Esempio n. 1
0
class GameEnv(object):
    def __init__(self, level='env/level.csv'):

        self.game = Game(level)
        self.repeat_frame_skip = 4

    def reset(self):
        self.game.reset()
        state = self.game.state()
        self.agent_coord = state['coord']
        return state

    def step(self, action):

        for _ in range(self.repeat_frame_skip):
            self.game.step(action)

        state = self.game.state()
        dead = state['dead']
        goal = state['goal']
        coord = state['coord']

        reward = -1 + (coord[0] -
                       self.agent_coord[0]) + 100 * goal - 100 * dead
        done = dead or goal
        self.agent_coord = coord
        return state, reward, done, {
            'goal': goal,
            'dead': dead,
            'distance': self.agent_coord[0]
        }

    def render(self, mode='rgb_array'):
        pixels = self.game.render(mode)
        pixels = np.swapaxes(pixels, 0, 1)
        return pixels
Esempio n. 2
0
if __name__ == "__main__":

    start_iter = 50000
    init_checkpoint = None
    num_epochs = 2000001
    dim_states = 52
    rl = RL(dim_states, lr_a=0.0001, lr_c=0.0001, init_checkpoint=init_checkpoint)

    # fine-tune
    if start_iter != 0 and not init_checkpoint:
        rl.load_model('rl', start_iter)

    env = Game()
    for episode in range(start_iter, num_epochs):
        env.reset()
        print()

        history_vec = []
        history_pid = []
        while 1:
            pid = env.now_player_id
            # 无人叫地主 or 游戏结束,记录所有存档
            if env.landlord_count == 3 or env.winner >= 0:
                for i in range(3):
                    state, f_reward, y_reward, act_ids, dyn_vec, _, label_mask, attn_mask = env.observe(pid)
                    print('玩家', pid, '获得奖励', y_reward)
                    pid = (pid + 1) % 3
                    env.now_player_id = pid
                break