Esempio n. 1
0
def main():
    #tensorforce
    env = OpenAIGym('JacoArm-v0')

    agent = TRPOAgent(states_spec=env.states,
                      actions_spec=env.actions,
                      network_spec=network_spec,
                      batch_size=512)

    # agent = PPOAgent(
    # 	states_spec=env.states,
    # 	actions_spec=env.actions,
    # 	network_spec=network_spec,
    # 	batch_size=512,
    # 	step_optimizer=dict(
    # 		type='adam',
    # 		learning_rate=1e-4
    # 	)
    # )

    runner = Runner(agent=agent, environment=env)

    raw_input("hit enter when gazebo is loaded...")
    print()
    env.gym.unpause()
    env.gym.hold_init_robot_pos([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0])
    runner.run(episodes=1500,
               max_episode_timesteps=1000,
               episode_finished=episode_finished)

    #old-fashioned way
    # env = gym.make('JacoArm-v0')
    # print "launching the world..."
    # #gz loaing issues, let user start the learning
    # raw_input("hit enter when gazebo is loaded...")
    # env.set_physics_update(0.0001, 10000)
    # raw_input("hit enter when gazebo is loaded...")

    # # env.set_goal([0.167840578046, 0.297489331432, 0.857454500127])

    # total_episodes = 100
    # action = [1,1,1,1,1,1,1,1,1,1]
    # x = 0
    # # for x in range(total_episodes):
    # while True:
    # 	# if x % 10 is 0:
    # 	action = numpy.random.rand(1, 10)[0]
    # 		# print 'new action is', action

    # 	state, reward, done, _ = env.step(action)
    # 	print reward
    # 	time.sleep(0.2)
    # 	x += 1

    write_to_csv(train_data, 'test.csv')
    env.close()
Esempio n. 2
0
def experiment(args, env_name, base_agent="agent.json", 
        agent_folder=None, visualize=True, num_episodes=1000):
    
    seasonals = (env_name=="seasonals-v1")

    train_env = OpenAIGym(env_name) \
            if not seasonals else EnvWrap(
                    gym.make('seasonals-v1'), batched=True,
                    subep_len=252, num_subeps=5)
    test_env = OpenAIGym(env_name, monitor_video=1, 
            monitor=os.path.join(agent_folder, "monitor")) \
                if not seasonals else EnvWrap(gym.make('seasonals-v1'))

    agent = setup_agent(train_env.states, train_env.actions, args, 
            save_dir=agent_folder, base_agent_file=base_agent)

    rewards, test_episodes, test_rewards = train(
            agent, train_env, num_episodes=num_episodes, 
            test_env=train_env)

    train_env.close()
    if visualize:
        plot_rewards(rewards, 
                test_episodes=test_episodes,
                test_rewards=test_rewards,
                save_dir=agent_folder)
    reward, history = test(agent, test_env, start_index=(
        test_env.first_trading_day + 252 * 5 if seasonals else None))

    graph_episode(history, 
            save_path=os.path.join(agent_folder, "test.png"))
    test_env.close()
    agent.close()
    experiment_data = {"final_test_reward":reward,
            "test_average_last_50":np.mean(test_rewards[-10:]),
            "train_average_last_50":np.mean(rewards[-50:]),
            "test_average_last_10":np.mean(test_rewards[-2:]),
            "train_average_last_10":np.mean(rewards[-10:]),
            }
    experiment_data.update(args)
    return experiment_data
Esempio n. 3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-n', '--network-config', help="Network configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video)

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")

    if args.network_config:
        network = from_json(args.network_config)
    else:
        network = None
        logger.info("No network configuration provided.")
    agent_config.default(dict(states=environment.states, actions=environment.actions, network=network))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        agent.load_model(args.load)

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    runner = Runner(
        agent=agent,
        environment=environment,
        repeat_actions=1,
        save_path=args.save,
        save_episodes=args.save_episodes
    )

    report_episodes = args.episodes // 1000
    if args.debug:
        report_episodes = 1

    def episode_finished(r):
        if r.episode % report_episodes == 0:
            sps = r.total_timesteps / (time.time() - r.start_time)
            logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps))
            logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
            logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
            logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
        return True

    logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment))
    runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
    logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode))

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()
Esempio n. 4
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('gym_id', help="ID of the gym environment, i.e. ppaquette/SuperMarioBros-1-1-v0")
    parser.add_argument('-a', '--agent', help='Agent')
    parser.add_argument('-c', '--agent-config', help="Agent configuration file")
    parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes")
    parser.add_argument('-t', '--max-timesteps', type=int, default=100000, help="Maximum number of timesteps per episode")
    parser.add_argument('-m', '--monitor', help="Save results to this directory")
    parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results")
    parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)")
    parser.add_argument('-s', '--save', help="Save agent to this dir")
    parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes")
    parser.add_argument('-l', '--load', help="Load agent from this dir")
    parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs")
    parser.add_argument('-ld', '--load-demo', required=True, help="Load demos from this dir")
    parser.add_argument('-pt', '--pretrain', action='store_true', default=False, help="Pretrain agent on demos")
    parser.add_argument('-ul', '--use_lstm', action='store_true', default=False, help="Use LSTM model")
    parser.add_argument('-ls', '--lstm_size', type=int, default=256, help="LSTM size")

    args = parser.parse_args()

    logger = logging.getLogger(__name__)
    logger.setLevel(logging.DEBUG)

    environment = OpenAIGym(args.gym_id,
                            monitor=args.monitor,
                            monitor_safe=args.monitor_safe,
                            monitor_video=args.monitor_video)
    mode_wrapper = wrappers.SetPlayingMode('algo')
    ac_wrapper = wrappers.ToDiscrete()
    environment.gym = mode_wrapper(ac_wrapper(environment.gym))

    if args.agent_config:
        agent_config = Configuration.from_json(args.agent_config)
    else:
        agent_config = Configuration()
        logger.info("No agent configuration provided.")

    agent_config.default(dict(states=environment.states,
                              actions=environment.actions,
                              network=mario_net(name='mario',
                                                lstm_size=args.lstm_size,
                                                actions=environment.actions['num_actions'],
                                                use_lstm=args.use_lstm)))
    agent = agents[args.agent](config=agent_config)

    if args.load:
        load_dir = os.path.dirname(args.load)
        if not os.path.isdir(load_dir):
            raise OSError("Could not load agent from {}: No such directory.".format(load_dir))
        logger.info("-" * 16)
        agent.load_model(args.load)
        logger.info("Loaded {}".format(agent))

    if args.debug:
        logger.info("-" * 16)
        logger.info("Configuration:")
        logger.info(agent_config)

    if args.save:
        save_dir = os.path.dirname(args.save)
        if not os.path.isdir(save_dir):
            try:
                os.mkdir(save_dir, 0o755)
            except OSError:
                raise OSError("Cannot save agent to dir {} ()".format(save_dir))

    try:
        if args.load_demo:
            logger.info("-" * 16)
            logger.info("Loading demos")
            demos = demo.load(args.load_demo)
            logger.info("Importing demos")
            agent.import_demonstrations(demos)

            if args.pretrain:
                logger.info("-" * 16)
                logger.info("Pretraining {} steps".format(len(demos)))
                agent.pretrain(steps=len(demos))

        runner = Runner(
            agent=agent,
            environment=environment,
            repeat_actions=1,
            save_path=args.save,
            save_episodes=args.save_episodes
        )

        report_episodes = args.episodes // 1000
        if args.debug:
            report_episodes = 1

        def episode_finished(r):
            if r.episode % report_episodes == 0:
                logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep))
                logger.info("Episode reward: {}".format(r.episode_rewards[-1]))
                logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500))
                logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100))
            return True

        logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment))
        runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished)
        logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode))
    except (KeyboardInterrupt):
        agent.save_model(args.save)
        pass

    if args.monitor:
        environment.gym.monitor.close()
    environment.close()