def get_default_config(agent_type): """ Get default configuration from agent by providing type as a string parameter. :param agent_type: String parameter containing agent type :return: Default configuration dict """ agent_class = agents.get(agent_type) if not agent_class: raise TensorForceValueError("No such agent: {}".format(agent_type)) return Config(agent_class.default_config), Config( agent_class.model_ref.default_config)
def main(): gym_id = 'CartPole-v0' max_episodes = 10000 max_timesteps = 1000 env = OpenAIGymEnvironment(gym_id, monitor=False, monitor_video=False) config = Config({ 'repeat_actions': 1, 'actions': env.actions, 'action_shape': env.action_shape, 'state_shape': env.state_shape, 'exploration': 'constant', 'exploration_args': [0.1] }) agent = SimpleQAgent(config, "simpleq") runner = Runner(agent, env) def episode_finished(r): if r.episode % 10 == 0: print("Finished episode {ep} after {ts} timesteps".format( ep=r.episode + 1, ts=r.timestep + 1)) print("Episode reward: {}".format(r.episode_rewards[-1])) print("Average of last 10 rewards: {}".format( np.mean(r.episode_rewards[-10:]))) return True print("Starting {agent} for Environment '{env}'".format(agent=agent, env=env)) runner.run(max_episodes, max_timesteps, episode_finished=episode_finished) print("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1))
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file") parser.add_argument('-n', '--network-config', help="Network configuration file") parser.add_argument('-e', '--episodes', type=int, default=50000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this directory") parser.add_argument('-ms', '--monitor-safe', action='store_true', default=False, help="Do not overwrite previous results") parser.add_argument('-mv', '--monitor-video', type=int, default=0, help="Save video every x steps (0 = disabled)") parser.add_argument('-s', '--save', help="Save agent to this dir") parser.add_argument('-se', '--save-episodes', type=int, default=100, help="Save agent every x episodes") parser.add_argument('-l', '--load', help="Load agent from this dir") parser.add_argument('-D', '--debug', action='store_true', default=False, help="Show debug outputs") args = parser.parse_args() env = OpenAIGymEnvironment(args.gym_id, monitor=args.monitor, monitor_safe=args.monitor_safe, monitor_video=args.monitor_video) config = Config({ 'repeat_actions': 1, 'actions': env.actions, 'action_shape': env.action_shape, 'state_shape': env.state_shape, 'max_episode_length': args.max_timesteps }) if args.agent_config: config.read_json(args.agent_config) if args.network_config: config.read_json(args.network_config) logger = logging.getLogger(__name__) logger.setLevel(log_levels[config.loglevel]) preprocessing_config = config.get('preprocessing') if preprocessing_config: stack = build_preprocessing_stack(preprocessing_config) config.state_shape = stack.shape(config.state_shape) else: stack = None if args.debug: logger.info("-" * 16) logger.info("File configuration:") logger.info(config) agent = create_agent(args.agent, config) if args.load: load_dir = os.path.dirname(args.load) if not os.path.isdir(load_dir): raise OSError("Could not load agent from {}: No such directory.".format(load_dir)) agent.load_model(args.load) if args.debug: logger.info("-" * 16) logger.info("Agent configuration:") logger.info(agent.config) if agent.model: logger.info("Model configuration:") logger.info(agent.model.config) runner = Runner(agent, env, preprocessor=stack, repeat_actions=config.repeat_actions) if args.save: save_dir = os.path.dirname(args.save) if not os.path.isdir(save_dir): try: os.mkdir(save_dir, 0o755) except OSError: raise OSError("Cannot save agent to dir {} ()".format(save_dir)) runner.save_model(args.save, args.save_episodes) report_episodes = args.episodes // 1000 if args.debug: report_episodes = 10 def episode_finished(r): if r.episode % report_episodes == 0: logger.info("Finished episode {ep} after {ts} timesteps".format(ep=r.episode + 1, ts=r.timestep + 1)) logger.info("Episode reward: {}".format(r.episode_rewards[-1])) logger.info("Average of last 500 rewards: {}".format(np.mean(r.episode_rewards[-500:]))) logger.info("Average of last 100 rewards: {}".format(np.mean(r.episode_rewards[-100:]))) return True logger.info("Starting {agent} for Environment '{env}'".format(agent=agent, env=env)) runner.run(args.episodes, args.max_timesteps, episode_finished=episode_finished) logger.info("Learning finished. Total episodes: {ep}".format(ep=runner.episode + 1)) if args.monitor: env.gym.monitor.close() env.close()
def main(): parser = argparse.ArgumentParser() parser.add_argument('gym_id', help="ID of the gym environment") # Currently does not do anything since we don't have the distributed API for all models yet parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file", default='examples/configs/dqn_agent.json') parser.add_argument('-n', '--network-config', help="Network configuration file", default='examples/configs/dqn_network.json') parser.add_argument('-e', '--global-steps', type=int, default=1000000, help="Total number of steps") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-l', '--local-steps', type=int, default=20, help="Maximum number of local steps before update") parser.add_argument('-w', '--num-workers', type=int, default=1, help="Number of worker agents") parser.add_argument('-r', '--repeat-actions', type=int, default=1, help="???") parser.add_argument('-m', '--monitor', help="Save results to this file") parser.add_argument('-M', '--mode', choices=['tmux', 'child'], default='tmux', help="Starter mode") parser.add_argument('-L', '--logdir', default='logs_async', help="Log directory") parser.add_argument('-C', '--is-child', action='store_true', default=False) parser.add_argument('-i', '--task-index', type=int, default=0, help="Task index") parser.add_argument('-p', '--is-ps', type=int, default=0, help="Is param server") parser.add_argument('-K', '--kill', action='store_true', default=False, help="Kill runners") args = parser.parse_args() logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) session_name = 'openai_async' shell = '/bin/bash' kill_cmds = [ "kill $( lsof -i:12222-{} -t ) > /dev/null 2>&1".format( 12222 + args.num_workers), "tmux kill-session -t {}".format(session_name), ] if args.kill: os.system("\n".join(kill_cmds)) return 0 if not args.is_child: # start up child processes target_script = os.path.abspath(inspect.stack()[0][1]) def wrap_cmd(session, name, cmd): if isinstance(cmd, list): cmd = ' '.join(shlex_quote(str(arg)) for arg in cmd) if args.mode == 'tmux': return 'tmux send-keys -t {}:{} {} Enter'.format( session, name, shlex_quote(cmd)) elif args.mode == 'child': return '{} > {}/{}.{}.out 2>&1 & echo kill $! >> {}/kill.sh'.format( cmd, args.logdir, session, name, args.logdir) def build_cmd(index, parameter_server): cmd_args = [ 'CUDA_VISIBLE_DEVICES=', sys.executable, target_script, args.gym_id, '--is-child', '--agent-config', os.path.join(os.getcwd(), args.agent_config), '--network-config', os.path.join(os.getcwd(), args.network_config), '--num-workers', args.num_workers, '--task-index', index, '--is-ps', parameter_server ] return cmd_args if args.mode == 'tmux': cmds = kill_cmds + [ 'tmux new-session -d -s {} -n ps'.format(session_name) ] elif args.mode == 'child': cmds = [ 'mkdir -p {}'.format(args.logdir), 'rm -f {}/kill.sh'.format(args.logdir), 'echo "#/bin/bash" > {}/kill.sh'.format(args.logdir), 'chmod +x {}/kill.sh'.format(args.logdir) ] cmds.append(wrap_cmd(session_name, 'ps', build_cmd(0, 1))) for i in xrange(args.num_workers): name = 'w_{}'.format(i) if args.mode == 'tmux': cmds.append('tmux new-window -t {} -n {} -d {}'.format( session_name, name, shell)) cmds.append(wrap_cmd(session_name, name, build_cmd(i, 0))) # add one PS call # cmds.append('tmux new-window -t {} -n ps -d {}'.format(session_name, shell)) print("\n".join(cmds)) os.system("\n".join(cmds)) return 0 env = OpenAIGymEnvironment(args.gym_id) config = Config({ 'repeat_actions': 1, 'actions': env.actions, 'action_shape': env.action_shape, 'state_shape': env.state_shape }) if args.agent_config: config.read_json(args.agent_config) if args.network_config: config.read_json(args.network_config) preprocessing_config = config.get('preprocessing') if preprocessing_config: stack = build_preprocessing_stack(preprocessing_config) config.state_shape = stack.shape(config.state_shape) else: stack = None logger.info("Starting distributed agent for OpenAI Gym '{gym_id}'".format( gym_id=args.gym_id)) logger.info("Config:") logger.info(config) runner = DistributedRunner(agent_type=args.agent, agent_config=config, n_agents=args.num_workers, n_param_servers=1, environment=env, global_steps=args.global_steps, max_episode_steps=args.max_timesteps, preprocessor=stack, repeat_actions=args.repeat_actions, local_steps=args.local_steps, task_index=args.task_index, is_ps=(args.is_ps == 1)) runner.run()
def test_memoryagent_update_frequency(): """ Test MemoryAgent update frequency for SGD and value function updates. """ update_steps = np.random.randint(1, 10) target_update_steps = np.random.randint(20, 200) state_shape = list(np.random.randint(2, 8, size=3)) min_replay_size = np.random.randint(int(1e2), int(2e2)) memory_capacity = np.random.randint(int(5e2), int(1e3)) config = Config({ 'loglevel': 'debug', 'actions': np.random.randint(2, 10), 'batch_size': np.random.randint(2, 32), 'update_rate': 1.0 / update_steps, 'target_network_update_rate': 1.0 / target_update_steps, 'min_replay_size': min_replay_size, 'deterministic_mode': False, 'use_target_network': True, 'memory_capacity': memory_capacity, 'state_shape': state_shape, 'action_shape': [] }) agent = MemoryAgent(config, scope="memoryagent") model = MemoryAgentTestModel(config) # Set value function manually agent.model = model # Assert config values assert agent.batch_size == config['batch_size'] assert agent.update_steps == update_steps assert agent.target_update_steps == target_update_steps assert agent.min_replay_size == config['min_replay_size'] assert agent.use_target_network == config['use_target_network'] max_steps = np.random.randint(int(5e3), int(1e4)) print("Testing MemoryAgent for {} steps.".format(max_steps)) print("Memory capacity: {}".format(config['memory_capacity'])) print("Min replay size: {}".format(config['min_replay_size'])) print("Batch size: {}".format(config['batch_size'])) print("Update steps: {}".format(update_steps)) print("Target steps: {}".format(target_update_steps)) print("State shape: {}".format(state_shape)) print("Actions: {}".format(config['actions'])) print("-" * 16) step_count = 0 history = [] history_sums = [] for step_count in xrange(max_steps): while True: state = np.random.randint(0, 255, size=state_shape) action = agent.get_action(state) reward = float(np.random.randint(0, 100) // 80) # p = .8 for reward = 1 terminal = bool(np.random.randint(0, 100) // 95) sumsq = np.sum(np.square(state)) # avoid duplicate experiences if not sumsq in history_sums: break agent.add_observation(state, action, reward, terminal) history.append((state.astype(np.float32), action, reward, terminal)) history_sums.append(sumsq) # All steps - steps before min_replay_size + possible step if min_replay_size is a step itself expected_updates = (step_count + 1) // update_steps\ - min_replay_size // update_steps \ + int(min_replay_size % update_steps == 0) expected_target_updates = (step_count + 1) // target_update_steps \ - min_replay_size // target_update_steps \ + int(min_replay_size % target_update_steps == 0) print("Took {} steps.".format(step_count + 1)) print("Observed {} updates (expected {})".format(model.count_updates, expected_updates)) print("Observed {} target updates (expected {})".format(model.count_target_updates, expected_target_updates)) print("Memory has size {}".format(agent.memory.size)) assert model.count_updates == expected_updates assert model.count_target_updates == expected_target_updates assert memory_capacity == agent.memory.size
def main(): parser = argparse.ArgumentParser() parser.add_argument('env_id', help="ID of the universe environment", default='HarvestDay-v0') parser.add_argument('-a', '--agent', default='DQNAgent') parser.add_argument('-c', '--agent-config', help="Agent configuration file", default='examples/configs/dqn_agent.json') parser.add_argument('-n', '--network-config', help="Network configuration file", default='examples/configs/dqn_network.json') parser.add_argument('-e', '--episodes', type=int, default=10000, help="Number of episodes") parser.add_argument('-t', '--max-timesteps', type=int, default=2000, help="Maximum number of timesteps per episode") parser.add_argument('-m', '--monitor', help="Save results to this file") args = parser.parse_args() env_id = args.env_id episodes = args.episodes report_episodes = episodes / 100 max_timesteps = args.max_timesteps environment = OpenAIUniverseEnvironment(env_id) config = Config({ 'actions': environment.actions, 'action_shape': environment.action_shape, 'state_shape': environment.state_shape }) if args.agent_config: config.read_json(args.agent_config) if args.network_config: config.read_json(args.network_config) state_wrapper = None if config.state_wrapper: state_wrapper = create_wrapper(config.state_wrapper, config.state_wrapper_param) config.state_shape = state_wrapper.state_shape(config.state_shape) agent = create_agent(args.agent, config) if args.monitor: environment.env.monitor.start(args.monitor) environment.env.monitor.configure( video_callable=lambda count: False) # count % 500 == 0) print("Starting {agent_type} for OpenAI Universe environment '{env_id}'". format(agent_type=args.agent, env_id=env_id)) total_states = 0 repeat_actions = config.get('repeat_actions', 4) episode_rewards = [] for i in xrange(episodes): state = environment.reset() episode_reward = 0 repeat_action_count = 0 for j in xrange(max_timesteps): if state_wrapper: full_state = state_wrapper.get_full_state(state) else: full_state = state if repeat_action_count <= 0: action = agent.get_action(full_state, i, total_states) repeat_action_count = repeat_actions - 1 else: repeat_action_count -= 1 result = environment.execute_action(action) episode_reward += result['reward'] agent.add_observation(full_state, action, result['reward'], result['terminal_state']) state = result['state'] total_states += 1 if result['terminal_state']: break episode_rewards.append(episode_reward) if i % report_episodes == 0: print("Finished episode {ep} after {ts} timesteps".format(ep=i + 1, ts=j + 1)) print("Total reward: {}".format(episode_reward)) print("Average of last 500 rewards: {}".format( np.mean(episode_rewards[-500:]))) print("Average of last 100 rewards: {}".format( np.mean(episode_rewards[-100:]))) if args.monitor: environment.env.monitor.close() print("Learning finished. Total episodes: {ep}".format(ep=i + 1))