Beispiel #1
0
    def get_selfplay_data(self, n_rounds, epsilon=0.5, gamma=0.9):
        states = list()
        action_values = list()

        if self.verbose:
            starttime = time.time()
            print("Start self-play process with rounds [{0}]:".format(n_rounds))

        for i in range(n_rounds):
            if self.verbose:
                print("{0}th self-play round...".format(i+1))

            engine = GameEngine(
                state_shape=self.state_shape,
                ai=self.ai,
                verbose=self.verbose
            )

            _states, _action_values = engine.start_selfplay(epsilon=epsilon, gamma=gamma)
            for i in range(len(_action_values)):
                states.append(_states[i])
                action_values.append(_action_values[i])
        
        if self.verbose:
            endtime = time.time()
            print("End of self-play process with data size [{0}] and cost time [{1:.1f}s].".format(
                len(action_values),  (endtime - starttime)))

        #states = np.array(states)
        action_values = np.array(action_values)
        next_states = states[:-1]
        next_states.append(None)

        return states, action_values, next_states