Example #1
0
def run(args):
    if args.train_pg:
        env_name = args.env_name or 'LunarLander-v2'
        env = Environment(env_name, args, atari_wrapper=False)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        agent.train()

    if args.train_dqn:
        env_name = args.env_name or 'MsPacmanNoFrameskip-v0'
        env = Environment(env_name, args, atari_wrapper=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        agent.train()

    if args.test_pg:
        env = Environment('LunarLander-v2', args, test=True)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        test(agent, env, total_episodes=30)

    if args.test_dqn:
        env = Environment('MsPacmanNoFrameskip-v0',
                          args,
                          atari_wrapper=True,
                          test=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        test(agent, env, total_episodes=50)
def run(args):
    if args.train_pg:
        env_name = args.env_name or 'LunarLander-v2'
        env = Environment(env_name, args, atari_wrapper=False)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        agent.train()

    if args.train_pg_improve:
        env_name = args.env_name or 'LunarLander-v2'
        env = Environment(env_name, args, atari_wrapper=False)
        from agent_dir.agent_pg_improve import AgentPG
        agent = AgentPG(env, args)
        agent.train()

    if args.train_dqn:
        env_name = args.env_name or 'AssaultNoFrameskip-v0'
        env = Environment(env_name, args, atari_wrapper=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        agent.train()

    if args.train_mario:
        from agent_dir.agent_mario import AgentMario
        agent = AgentMario(None, args)
        agent.train()

    if args.test_pg:
        env = Environment('LunarLander-v2', args, test=True)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        test(agent, env, total_episodes=30)

    if args.test_pg_improve:
        env = Environment('LunarLander-v2', args, test=True)
        from agent_dir.agent_pg_improve import AgentPG
        agent = AgentPG(env, args)
        test(agent, env, total_episodes=30)

    if args.test_dqn:
        env = Environment('AssaultNoFrameskip-v0',
                          args,
                          atari_wrapper=True,
                          test=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        test(agent, env, total_episodes=100)

    if args.test_mario:
        env = Environment('SuperMarioBros-v0', args)
        from agent_dir.agent_mario import AgentMario
        agent = AgentMario(env, args)
        test(agent, env, total_episodes=10)
def run(args):

    "******Policy Gradient******"
    if args.train_pg:
        env_name = args.env_name or 'LunarLander-v2'
        env = Environment(env_name, args, atari_wrapper=False)
        if args.pg_type == 'pg':
            from agent_dir.agent_pg import AgentPG
            agent = AgentPG(env, args)
        elif args.pg_type == 'pg_nor':
            from agent_dir.agent_pg import AgentPG
            agent = AgentPG(env, args, normalization=True)
        elif args.pg_type == 'pg_ppo':
            from agent_dir.agent_pg_ppo import AgentPG
            agent = AgentPG(env, args)
        agent.train()

    if args.test_pg:
        env = Environment('LunarLander-v2', args, test=True)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        test(agent, env, total_episodes=30)

    "******Deep Q Learning******"
    if args.train_dqn:
        env_name = args.env_name or 'AssaultNoFrameskip-v0'
        env = Environment(env_name, args, atari_wrapper=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        agent.train()
    if args.test_dqn:
        env = Environment('AssaultNoFrameskip-v0',
                          args,
                          atari_wrapper=True,
                          test=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        test(agent, env, total_episodes=100)

    if args.train_a2c:
        from agent_dir.agent_a2c import AgentA2C
        agent = AgentA2C(None, args)
        agent.train()

    if args.test_a2c:
        env = Environment('SuperMarioBros-v0', args)
        from agent_dir.agent_a2c import AgentA2C
        agent = AgentA2C(env, args)
        test(agent, env, total_episodes=10)
Example #4
0
def run(args):
    if args.train_pg:
        env_name = args.env_name or 'LunarLander-v2'
        env = Environment(env_name, args, atari_wrapper=False)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        agent.train()

    if args.train_dqn:
        env_name = args.env_name or 'MsPacmanNoFrameskip-v0'
        env = Environment(env_name, args, atari_wrapper=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        agent.train()

    if args.test_pg:
        env = Environment('LunarLander-v2', args, test=True)
        from agent_dir.agent_pg import AgentPG
        agent = AgentPG(env, args)
        test(agent, env, total_episodes=30)

    if args.test_dqn:
        env = Environment('MsPacmanNoFrameskip-v0', args, atari_wrapper=True, test=True)
        from agent_dir.agent_dqn import AgentDQN
        agent = AgentDQN(env, args)
        test(agent, env, total_episodes=50)
    
    if args.plot_dqn == 1:
        import matplotlib.pyplot as plt
        env_name = args.env_name or 'MsPacmanNoFrameskip-v0'
        env = Environment(env_name, args, atari_wrapper=True)
        from agent_dqn_plot import AgentDQN
        agent = AgentDQN(env, args)

        dqn_training = agent.train()
        ret = None
        while(ret==None or ret[-1] < 25):
            window_size = 20
            ret = next(dqn_training)
            plt.plot(ret)
            plt.xlabel('number of episodes playing')
            plt.ylabel('average reward of last {} episodes'.format(window_size))
            plt.title('learning curve of dqn with pacman')
            plt.savefig('dqn-learning_curve1.png')
            plt.close()

    elif args.plot_dqn == 2:
        import matplotlib.pyplot as plt
        game_name = 'MsPacmanNoFrameskip-v0'
        #game_name = 'AtlantisNoFrameskip-v0'
        #game_name = 'UpNDownNoFrameskip-v0'
        #game_name = 'AmidarNoFrameskip-v0'
        #game_name = 'GopherNoFrameskip-v0'
        #game_name = 'SkiingNoFrameskip-v0'
        env_name = args.env_name or game_name
        env1 = Environment(env_name, args, atari_wrapper=True)
        env2 = Environment(env_name, args, atari_wrapper=True)
        env3 = Environment(env_name, args, atari_wrapper=True)
        env4 = Environment(env_name, args, atari_wrapper=True)
        from agent_dqn_plot import AgentDQN
        agent1 = AgentDQN(env1, args, target_update_freq=100000)
        agent2 = AgentDQN(env2, args, target_update_freq=10000)
        agent3 = AgentDQN(env3, args, target_update_freq=1000)
        agent4 = AgentDQN(env4, args, target_update_freq=1)

        dqn_training1 = agent1.train()
        dqn_training2 = agent2.train()
        dqn_training3 = agent3.train()
        dqn_training4 = agent4.train()

        for episode in range(6000):
            window_size = 20
            ret1 = next(dqn_training1)
            ret2 = next(dqn_training2)
            ret3 = next(dqn_training3)
            ret4 = next(dqn_training4)
            if episode % 10 == 0:
                plt.plot(ret1, label='target_update_freq=100000')
                plt.plot(ret2, label='target_update_freq=10000')
                plt.plot(ret3, label='target_update_freq=1000')
                plt.plot(ret4, label='target_update_freq=1')
                plt.xlabel('number of episodes playing')
                plt.ylabel('average reward of last {} episodes'.format(window_size))
                plt.legend()
                plt.title('learning curve of dqn with {}'.format(game_name))
                plt.savefig('dqn-learning_curve2.png')
                plt.close()
                #print(ret)

    elif args.plot_dqn == 3:
        print('Duel vs DQN')
        #print('DDQN vs DQN')
        import matplotlib.pyplot as plt
        #game_name = 'MsPacmanNoFrameskip-v0'
        #game_name = 'AlienNoFrameskip-v4'
        game_name = 'AtlantisNoFrameskip-v4'
        #game_name = 'UpNDownNoFrameskip-v0'
        #game_name = 'AsterixNoFrameskip-v0'
        #env_name = args.env_name or game_name
        env_name = game_name
        env1 = Environment(env_name, args, atari_wrapper=True)
        env2 = Environment(env_name, args, atari_wrapper=True)
        from agent_dqn_plot import AgentDQN
        from Duel import AgentDuel
        from DDQN import AgentDDQN
        agent1 = AgentDQN(env1, args)
        agent2 = AgentDuel(env2, args)
        #agent2 = AgentDDQN(env2, args)

        dqn_training1 = agent1.train()
        dqn_training2 = agent2.train()
        for episode in range(3500):
            window_size = 20
            ret1 = next(dqn_training1)
            ret2 = next(dqn_training2)
            if episode % 10 == 0:
                plt.plot(ret1, label='DQN')
                #plt.plot(ret2, label='DDQN')
                plt.plot(ret2, label='Duel')
                plt.xlabel('number of episodes playing')
                plt.ylabel('average reward of last {} episodes'.format(window_size))
                plt.legend()
                plt.title('learning curve of dqn with {}'.format(env_name))
                #plt.savefig('dqn-learning_curve_ddqn.png')
                plt.savefig(game_name + 'dqn-learning_curve_duel.png')
                plt.close()
                #print(ret)

    if args.plot_pg == 1:
        import matplotlib.pyplot as plt
        env_name = args.env_name or 'LunarLander-v2'
        env = Environment(env_name, args, atari_wrapper=False)
        from agent_pg_plot import AgentPG
        agent = AgentPG(env, args)
        print(agent.model)

        pg_training = agent.train()

        for _ in range(600):
            result = next(pg_training)
            """plot"""
            plt.plot(range(len(result)), result, label='policy gradient')
            plt.title('learning curve of pg with {}'.format(env_name))
            plt.ylabel('average reward of last 20 episodes')
            plt.xlabel('number of episodes playing')
            plt.legend()
            plt.savefig('pg-learning_curve.png')
            plt.close()
    
    if args.plot_pg == 3:
        import matplotlib.pyplot as plt
        env_name = args.env_name or 'LunarLander-v2'
        env1 = Environment(env_name, args, atari_wrapper=False)
        env2 = Environment(env_name, args, atari_wrapper=False)
        from agent_pg_plot import AgentPG
        from importance_sampling import Agent
        agent1 = AgentPG(env1, args)
        agent2 = Agent(args, env2.observation_space.shape[0], env2.action_space, env2)

        pg_training1 = agent1.train()
        pg_training2 = agent2.train()

        for _ in range(600):
            result1 = next(pg_training1)
            result2 = next(pg_training2)
            """plot"""
            plt.plot(range(len(result1)), result1, label='online-policy gradient')
            plt.plot(range(len(result2)), result2, label='offline-policy gradient by importance sampling')
            #plt.plot(result2)
            plt.title('learning curve of pg with {}'.format(env_name))
            plt.ylabel('average reward of last 20 episodes')
            plt.xlabel('number of episodes playing')
            plt.legend()
            #plt.grid(True)
            plt.savefig('reward-episodes2.png')
            plt.close()