def main(): #tensorforce env = OpenAIGym('JacoArm-v0') agent = TRPOAgent(states_spec=env.states, actions_spec=env.actions, network_spec=network_spec, batch_size=512) # agent = PPOAgent( # states_spec=env.states, # actions_spec=env.actions, # network_spec=network_spec, # batch_size=512, # step_optimizer=dict( # type='adam', # learning_rate=1e-4 # ) # ) runner = Runner(agent=agent, environment=env) raw_input("hit enter when gazebo is loaded...") print() env.gym.unpause() env.gym.hold_init_robot_pos([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0]) runner.run(episodes=1500, max_episode_timesteps=1000, episode_finished=episode_finished) #old-fashioned way # env = gym.make('JacoArm-v0') # print "launching the world..." # #gz loaing issues, let user start the learning # raw_input("hit enter when gazebo is loaded...") # env.set_physics_update(0.0001, 10000) # raw_input("hit enter when gazebo is loaded...") # # env.set_goal([0.167840578046, 0.297489331432, 0.857454500127]) # total_episodes = 100 # action = [1,1,1,1,1,1,1,1,1,1] # x = 0 # # for x in range(total_episodes): # while True: # # if x % 10 is 0: # action = numpy.random.rand(1, 10)[0] # # print 'new action is', action # state, reward, done, _ = env.step(action) # print reward # time.sleep(0.2) # x += 1 write_to_csv(train_data, 'test.csv') env.close()
def experiment(args, env_name, base_agent="agent.json", agent_folder=None, visualize=True, num_episodes=1000): seasonals = (env_name=="seasonals-v1") train_env = OpenAIGym(env_name) \ if not seasonals else EnvWrap( gym.make('seasonals-v1'), batched=True, subep_len=252, num_subeps=5) test_env = OpenAIGym(env_name, monitor_video=1, monitor=os.path.join(agent_folder, "monitor")) \ if not seasonals else EnvWrap(gym.make('seasonals-v1')) agent = setup_agent(train_env.states, train_env.actions, args, save_dir=agent_folder, base_agent_file=base_agent) rewards, test_episodes, test_rewards = train( agent, train_env, num_episodes=num_episodes, test_env=train_env) train_env.close() if visualize: plot_rewards(rewards, test_episodes=test_episodes, test_rewards=test_rewards, save_dir=agent_folder) reward, history = test(agent, test_env, start_index=( test_env.first_trading_day + 252 * 5 if seasonals else None)) graph_episode(history, save_path=os.path.join(agent_folder, "test.png")) test_env.close() agent.close() experiment_data = {"final_test_reward":reward, "test_average_last_50":np.mean(test_rewards[-10:]), "train_average_last_50":np.mean(rewards[-50:]), "test_average_last_10":np.mean(test_rewards[-2:]), "train_average_last_10":np.mean(rewards[-10:]), } experiment_data.update(args) return experiment_data
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: agent_config = Configuration() logger.info("No agent configuration provided.") if args.network_config: network = from_json(args.network_config) else: network = None logger.info("No network configuration provided.") agent_config.default(dict(states=environment.states, actions=environment.actions, network=network)) agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) runner = Runner( agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes ) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: sps = r.total_timesteps / (time.time() - r.start_time) logger.info("Finished episode {ep} after {ts} timesteps. Steps Per Second {sps}".format(ep=r.episode, ts=r.timestep, sps=sps)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode)) if args.monitor: environment.gym.monitor.close() environment.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment, i.e. ppaquette/SuperMarioBros-1-1-v0") parser.add_argument('-a', '--agent', help='Agent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=100000, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") parser.add_argument('-ld', '--load-demo', required=True, help="Load demos from this dir") parser.add_argument('-pt', '--pretrain', action='store_true', default=False, help="Pretrain agent on demos") parser.add_argument('-ul', '--use_lstm', action='store_true', default=False, help="Use LSTM model") parser.add_argument('-ls', '--lstm_size', type=int, default=256, help="LSTM size") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) environment = OpenAIGym(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video) mode_wrapper = wrappers.SetPlayingMode('algo') ac_wrapper = wrappers.ToDiscrete() environment.gym = mode_wrapper(ac_wrapper(environment.gym)) if args.agent_config: agent_config = Configuration.from_json(args.agent_config) else: agent_config = Configuration() logger.info("No agent configuration provided.") agent_config.default(dict(states=environment.states, actions=environment.actions, network=mario_net(name='mario', lstm_size=args.lstm_size, actions=environment.actions['num_actions'], use_lstm=args.use_lstm))) agent = agents[args.agent](config=agent_config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) logger.info("-" * 16) agent.load_model(args.load) logger.info("Loaded {}".format(agent)) if args.debug: logger.info("-" * 16) logger.info("Configuration:") logger.info(agent_config) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) try: if args.load_demo: logger.info("-" * 16) logger.info("Loading demos") demos = demo.load(args.load_demo) logger.info("Importing demos") agent.import_demonstrations(demos) if args.pretrain: logger.info("-" * 16) logger.info("Pretraining {} steps".format(len(demos))) agent.pretrain(steps=len(demos)) runner = Runner( agent=agent, environment=environment, repeat_actions=1, save_path=args.save, save_episodes=args.save_episodes ) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 1 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode, ts=r.timestep)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(sum(r.episode_rewards[-500:]) / 500)) logger.info("Average of last 100 rewards: {}".format(sum(r.episode_rewards[-100:]) / 100)) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=environment)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode)) except (KeyboardInterrupt): agent.save_model(args.save) pass if args.monitor: environment.gym.monitor.close() environment.close()