def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. # TODO: Change back to 1e6 memory = Memory(limit=int(1e2), state_shape=env.state_space.shape, action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() kwargs.pop('state_shape') training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run_baselines(env, seed, log_dir): ''' Create baselines model and training. Replace the ddpg and its training with the algorithm you want to run. :param env: Environment of the task. :param seed: Random seed for the trial. :param log_dir: Log dir path. :return ''' rank = MPI.COMM_WORLD.Get_rank() seed = seed + 1000000 * rank set_global_seeds(seed) env.seed(seed) # Set up logger for baselines configure(dir=log_dir, format_strs=['stdout', 'log', 'csv', 'tensorboard']) baselines_logger.info('rank {}: seed={}, logdir={}'.format( rank, seed, baselines_logger.get_dir())) # Set up params for baselines ddpg nb_actions = env.action_space.shape[-1] layer_norm = False action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(params['sigma']) * np.ones(nb_actions)) memory = Memory(limit=params['replay_buffer_size'], action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) training.train(env=env, eval_env=None, param_noise=None, action_noise=action_noise, actor=actor, critic=critic, memory=memory, nb_epochs=params['n_epochs'], nb_epoch_cycles=params['n_epoch_cycles'], render_eval=False, reward_scale=1., render=False, normalize_returns=False, normalize_observations=False, critic_l2_reg=0, actor_lr=params['policy_lr'], critic_lr=params['qf_lr'], popart=False, gamma=params['discount'], clip_norm=None, nb_train_steps=params['n_train_steps'], nb_rollout_steps=params['n_rollout_steps'], nb_eval_steps=100, batch_size=64) return osp.join(log_dir, 'progress.csv')
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) logger.configure(dir='/home/vaisakhs_shaj/Desktop/DeepReinforcementLearning/5_Deep_Deterministic_Policy_Gradients/LOGS/OSIM') # Create envs. env = ProstheticsEnv(visualize=True) env.change_model(model = '2D', difficulty = 0, prosthetic = True, seed=seed) #env.seed(seed) #env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(2e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 2000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def main(): args = parse_args() # create the environment env = gym.make("kuka-v0") # <-- this we need to create env.init_bullet(render=True) # create the learning agent # model = deepq.models.mlp([16, 16]) ob_dim = env.observation_space.shape[0] ac_dim = env.action_space.n # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] noise_type = 'adaptive-param_0.2' for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # policy = GaussianMlpPolicy(ob_dim, ac_dim) # vf = NeuralNetValueFunction(ob_dim, ac_dim) # learn(env, policy=policy, vf=vf, # gamma=0.99, lam=0.97, timesteps_per_batch=2500, # desired_kl=0.002, # num_timesteps=1000, animate=False) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=False) actor = Actor(nb_actions, layer_norm=False) training.train(env=env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **args) env.close()
def run(env_id, seed, noise_type, layer_norm, evaluation, render, **kwargs): disabled = True # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = LearningEnvironment(num_particles=PARTICLES, disable_render=not render) env = StackedEnvWrapper(env, state_history_len=4) gym.logger.setLevel(logging.WARN) if evaluation and rank==0: eval_env = LearningEnvironment(num_particles=PARTICLES, disable_render=not render) eval_env = StackedEnvWrapper(eval_env, state_history_len=4) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'collision_eval')) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(5e4), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() #set_global_seeds(seed) #env.seed(seed) #if eval_env is not None: # eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, render=render, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): logging.basicConfig(filename='noGazebo_ddpg.log', level=logging.DEBUG, filemode="w") logging.getLogger().addHandler(logging.StreamHandler()) # Configure logger for the process with rank 0 (main-process?) # MPI = Message Passing Interface, for parallel computing; rank = process identifier within a group of processes rank = MPI.COMM_WORLD.Get_rank() if rank != 0: # Disable logging for rank != 0 to avoid noise. logging.debug( "I'm MPI worker {} and I guess I just log nothing".format(rank)) logger.set_level(logger.DISABLED) logging.disable(logging.CRITICAL) logging.info( "********************************************* Starting RL algorithm *********************************************" ) now = datetime.datetime.now() logging.info(now.isoformat()) # Create envs. env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[0] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. (initialize memory, critic & actor objects) logging.info("action space of env: {}".format(env.action_space)) # Box(2,) logging.info("observation space of env: {}".format( env.observation_space)) # Box(51200,) memory = Memory(limit=int(1e4), action_shape=(env.action_space.shape[0], ), observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Train the RL algorithm start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) # Training is done env.close() if eval_env is not None: eval_env.close() logger.info('total runtime: {}s'.format(time.time() - start_time)) now = datetime.datetime.now() logging.info(now.isoformat()) logging.info( "********************************************* End of RL algorithm *********************************************" ) return True
def run(env_id, seed, noise_type, layer_norm, evaluation, outdir, no_hyp, **kwargs): params = locals() # Configure things. # rank = MPI.COMM_WORLD.Get_rank() # if rank != 0: logger.set_level(logger.DISABLED) rank = 0 # Create envs. env = make_env(env_id) weight_file = kwargs.pop('weight_file') if not weight_file: outdir = exp_utils.prepare_exp_dirs(params, outdir, env_id) else: outdir = exp_utils.prepare_exp_dirs(params, outdir, env_id, 'eval') logger.configure(outdir) os.makedirs(outdir, exist_ok=True) env = bench.Monitor(env, os.path.join(outdir, "%i.monitor.json" % rank)) gym.logger.setLevel(logging.WARN) logger.info('Output directory:{}, env:{}, no_hyp:{}'.format( outdir, env_id, no_hyp)) if evaluation: eval_env = make_env(env_id) eval_env.seed(42) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval'), allow_early_resets=True) # env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e5), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) # critic = models.ConvCritic(layer_norm=layer_norm) # actor = models.ConvActor(nb_actions, layer_norm=layer_norm, no_hyp=no_hyp) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm, no_hyp=no_hyp) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() # set_global_seeds(seed) # env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() if weight_file: evaluate( env, nb_episodes=kwargs.get('nb_epochs', 100), reward_scale=kwargs.get('reward_scale'), render=kwargs.get('render'), param_noise=None, action_noise=None, actor=actor, critic=critic, critic_l2_reg=kwargs.get('critic_l2_reg'), memory=memory, weight_file=weight_file, ) else: training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, outdir=outdir, no_hyp=no_hyp, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank==0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None hier = env.env.hier obs_shape = env.env.observation_space.spaces[1].shape if hier else env.observation_space.shape # Parse noise_type action_noise = None param_noise = None try: nb_actions = env.action_space.shape[-1] except: assert isinstance(env.action_space, spaces.Discrete) action_shape = nb_actions = np.asarray((env.action_space.n)) for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. if hier: memory = HierMemory(tokens = env.get_all_tokens(), limit=int(1e6), action_shape=env.action_space.shape, observation_shape=obs_shape) else: memory = Memory(limit=int(1e6), action_shape=action_shape, observation_shape=obs_shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, hier=hier, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. if env_id == 'navigate': env = NavigateEnv(use_camera=False, continuous_actions=True, neg_reward=False, max_steps=500) elif env_id == 'toy': #env = continuous_gridworld.ContinuousGridworld('', max_steps=1000, obstacle_mode=continuous_gridworld.NO_OBJECTS) from toy_environment import room_obstacle_list env = gridworld.Gridworld(room_obstacle_list.obstacle_list, step_size=0.2) elif env_id == 'arm2pos': env = Arm2PosEnv(continuous=True, max_steps=500, neg_reward=False) elif env_id == 'pick-and-place': env = PickAndPlaceEnv(max_steps=500) else: env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) # env = gym.wrappers.Monitor(env, '/tmp/ddpg/', force=True) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() del kwargs['tb_dir'] del kwargs['save_path'] hindsight_mode = kwargs['hindsight_mode'] del kwargs['hindsight_mode'] training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, hindsight_mode=hindsight_mode, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): """ run the training of DDPG :param env_id: (str) the environment ID :param seed: (int) the initial random seed :param noise_type: (str) the wanted noises ('adaptive-param', 'normal' or 'ou'), can use multiple noise type by seperating them with commas :param layer_norm: (bool) use layer normalization :param evaluation: (bool) enable evaluation of DDPG training :param kwargs: (dict) extra keywords for the training.train function """ # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mean=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mean=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. # env = CartpoleSwingupEnvX() env = experiments[env_id]['env_call']() if experiments[env_id]['normalize_env']: env = normalize(env) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) # env = gym.wrappers.Monitor(env, log_dir, video_callable=False, # force=True) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: eval_env = experiments[env_id]['env_call']() if experiments[env_id]['normalize_env']: eval_env = normalize(eval_env) eval_env = bench.Monitor( eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory( limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info( 'rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) # env.seed(seed) # if eval_env is not None: # eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): print( "********************************************* Starting RL algorithm *********************************************" ) # Configure logger for the process with rank 0 (main-process?) # MPI = Message Passing Interface, for parallel computing; rank = process identifier within a group of processes rank = MPI.COMM_WORLD.Get_rank() if rank != 0: # Disable logging for rank != 0 to avoid noise. logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[ -1] - 2 # 4 action-dimensions (we keep roll & pitch angle of the robot arm fixed) for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. (initialize memory, critic & actor objects) # print(env.action_space) # Box(6,) # print(env.observation_space) # Box(220, 300) # print(env.observation_space.shape) # (220, 300) # print(env.observation_space.shape[0]) # 220 memory = Memory(limit=int(1e3), action_shape=(env.action_space.shape[0] - 2, ), observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Train the RL algorithm if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) # Training is done env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time)) return True
def run(env_id, seed, noise_type, num_cpu, layer_norm, logdir, gym_monitor, evaluation, bind_to_core, **kwargs): kwargs['logdir'] = logdir whoami = mpi_fork(num_cpu, bind_to_core=bind_to_core) if whoami == 'parent': sys.exit(0) # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: # Write to temp directory for all non-master workers. actual_dir = None Logger.CURRENT.close() Logger.CURRENT = Logger(dir=mkdtemp(), output_formats=[]) logger.set_level(logger.DISABLED) # Create envs. if rank == 0: env = gym.make(env_id) if gym_monitor and logdir: env = gym.wrappers.Monitor(env, os.path.join(logdir, 'gym_train'), force=True) env = SimpleMonitor(env) if evaluation: eval_env = gym.make(env_id) if gym_monitor and logdir: eval_env = gym.wrappers.Monitor(eval_env, os.path.join(logdir, 'gym_eval'), force=True) eval_env = SimpleMonitor(eval_env) else: eval_env = None else: env = gym.make(env_id) if evaluation: eval_env = gym.make(env_id) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() Logger.CURRENT.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
nb_epochs = num_timesteps//(nb_rollout_steps*nb_epoch_cycles) # Just train training.train(env=env, session=sess, nb_epochs=nb_epochs, nb_epoch_cycles=nb_epoch_cycles, nb_rollout_steps=nb_rollout_steps, render_eval=args.render_eval, reward_scale=args.reward_scale, render=args.render, normalize_returns=args.normalize_returns, normalize_observations=args.normalize_observations, critic_l2_reg=args.critic_l2_reg, actor_lr=args.actor_lr, critic_lr=args.critic_lr, popart=args.popart, gamma=args.gamma, clip_norm=args.clip_norm, nb_train_steps=args.nb_train_steps, # nb_eval_steps=args.nb_eval_steps, batch_size=args.batch_size, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, job_id="", outdir="/tmp/experiments/"+str(args.environment)+"/DDPG/") env.close() if eval_env is not None:
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), "%i.monitor.json"%rank)) gym.logger.setLevel(logging.WARN) if evaluation and rank==0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, logger.get_dir() and os.path.join(logger.get_dir(), "%i.monitor.json"%rank)) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) activation_map = { "relu" : tf.nn.relu, "leaky_relu" : U.lrelu, "tanh" :tf.nn.tanh} critic = Critic(layer_norm=layer_norm, hidden_sizes=kwargs["vf_size"], activation=activation_map[kwargs["activation_vf"]]) actor = Actor(nb_actions, layer_norm=layer_norm, hidden_sizes=kwargs["policy_size"], activation=activation_map[kwargs["activation_policy"]]) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(args, seed, noise_type, layer_norm, evaluation, **kwargs): import time import os import baselines.ddpg.training as training from baselines.ddpg.models import Actor, Critic from baselines.ddpg.memory import Memory from baselines.ddpg.noise import AdaptiveParamNoiseSpec, NormalActionNoise, OrnsteinUhlenbeckActionNoise import tensorflow as tf # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = common.make_env(args) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank==0: eval_env = common.make_env(args) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, custom_log_dir, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) train_recording_path = os.path.join( custom_log_dir, env_id, 'train', datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) os.makedirs(train_recording_path) # Create envs. env = gym.make(env_id) env = TraceRecordingWrapper(env, directory=train_recording_path, buffer_batch_size=10) logger.info('TraceRecordingWrapper dir: {}'.format(env.directory)) # env = bench.Monitor(env, os.path.join(train_recording_path, 'log')) if evaluation and rank == 0: eval_recording_path = os.path.join( custom_log_dir, env_id, 'eval', datetime.now().strftime("%Y-%m-%d-%H-%M-%S")) os.makedirs(eval_recording_path) eval_env = gym.make(env_id) eval_env = TraceRecordingWrapper(eval_env, directory=eval_recording_path, buffer_batch_size=10) logger.info('TraceRecordingWrapper eval dir: {}'.format( eval_env.directory)) # eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) # env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('DDPG: rank {}: seed={}, logdir={}'.format( rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) ######################################### DEFAULT DATA ####################################### history, abbreviation = read_stock_history(filepath='utils/datasets/stocks_history_target.h5') history = history[:, :, :4] history[:, 1:, 0] = history[:, 0:-1, 3] # correct opens target_stocks = abbreviation num_training_time = 1095 # get target history target_history = np.empty(shape=(len(target_stocks), num_training_time, history.shape[2])) for i, stock in enumerate(target_stocks): target_history[i] = history[abbreviation.index(stock), :num_training_time, :] print("target:", target_history.shape) testing_stocks = abbreviation test_history = np.empty(shape=(len(testing_stocks), history.shape[1] - num_training_time, history.shape[2])) for i, stock in enumerate(testing_stocks): test_history[i] = history[abbreviation.index(stock), num_training_time:, :] print("test:", test_history.shape) window_length = kwargs['window_length'] max_rollout_steps = kwargs['nb_rollout_steps'] ############################################################################################### train_env = PortfolioEnv(target_history, target_stocks, steps=min(max_rollout_steps, target_history.shape[1]-window_length-2), window_length=window_length) infer_train_env = PortfolioEnv(target_history, target_stocks, steps=target_history.shape[1]-window_length-2, window_length=window_length) infer_test_env = PortfolioEnv(test_history, testing_stocks, steps=test_history.shape[1]-window_length-2, window_length=window_length) kwargs['nb_eval_steps'] = infer_train_env.steps kwargs['nb_eval_test_steps'] = infer_test_env.steps print("SPACE:", train_env.observation_space.shape) # Parse noise_type action_noise = None param_noise = None nb_actions = train_env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=train_env.action_space.shape, observation_shape=train_env.observation_space.shape) critic = Critic(nb_actions, layer_norm=layer_norm, asset_features_shape=train_env.asset_features_shape) actor = Actor(nb_actions, layer_norm=layer_norm, asset_features_shape=train_env.asset_features_shape) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) train_env.seed(seed) infer_train_env.seed(seed) infer_test_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=train_env, train_eval_env=infer_train_env, test_eval_env=infer_test_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) train_env.close() infer_train_env.close() infer_test_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, perform, use_expert, expert_dir, use_trpo_expert, expert_limit, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) gym.logger.setLevel(logging.WARN) if evaluation and perform: perform = False if evaluation and rank == 0 or perform: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) # env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) if use_expert: expert = Expert(limit=expert_limit, env=env) if expert_dir is None: expert_dir = os.path.join('./expert', env.env.spec.id) + '/expert.pkl' expert.load_file(expert_dir) elif use_trpo_expert: assert expert_dir is not None expert = Expert(limit=expert_limit, env=env) expert.load_file_trpo(expert_dir) else: expert = None # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, perform=perform, expert=expert, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. #memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, memory = ESMemoryAdapter(limit=int(kwargs['buffer_size']), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape, forgetting_factor=kwargs['gamma'], overwrite_policy=kwargs['buffer_overwrite'], sample_policy=kwargs['buffer_sample'], batch_size=kwargs['batch_size']) del kwargs['buffer_size'] del kwargs['buffer_overwrite'] del kwargs['buffer_sample'] # observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. trial = int(logger.get_dir()[-3:]) seed = seed + 1000000 * rank + 10000 * trial logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
# Just train training.train( env=env, session=sess, nb_epochs=nb_epochs, nb_epoch_cycles=nb_epoch_cycles, nb_rollout_steps=nb_rollout_steps, render_eval=args.render_eval, reward_scale=args.reward_scale, render=args.render, normalize_returns=args.normalize_returns, normalize_observations=args.normalize_observations, critic_l2_reg=args.critic_l2_reg, actor_lr=args.actor_lr, critic_lr=args.critic_lr, popart=args.popart, gamma=args.gamma, clip_norm=args.clip_norm, nb_train_steps=args.nb_train_steps, # nb_eval_steps=args.nb_eval_steps, batch_size=args.batch_size, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, job_id="", outdir="/tmp/experiments/" + str(args.environment) + "/DDPG/") env.close()
def run(seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create the opensim env. train_env = prosthetics_env.Wrapper( osim_env.ProstheticsEnv(visualize=kwargs['render']), frameskip=kwargs['frameskip'], reward_shaping=kwargs['reward_shaping'], reward_shaping_x=kwargs['reward_shaping_x'], feature_embellishment=kwargs['feature_embellishment'], relative_x_pos=kwargs['relative_x_pos'], relative_z_pos=kwargs['relative_z_pos']) train_env.change_model(model=kwargs['model'].upper(), prosthetic=kwargs['prosthetic'], difficulty=kwargs['difficulty'], seed=seed) if rank == 0: train_env = bench.Monitor(train_env, None) else: train_env = bench.Monitor( train_env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation: eval_env = prosthetics_env.EvaluationWrapper( osim_env.ProstheticsEnv(visualize=kwargs['render_eval']), frameskip=kwargs['eval_frameskip'], reward_shaping=kwargs['reward_shaping'], reward_shaping_x=kwargs['reward_shaping_x'], feature_embellishment=kwargs['feature_embellishment'], relative_x_pos=kwargs['relative_x_pos'], relative_z_pos=kwargs['relative_z_pos']) eval_env.change_model(model=kwargs['model'].upper(), prosthetic=kwargs['prosthetic'], difficulty=kwargs['difficulty'], seed=seed) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) else: eval_env = None # training.train() doesn't like the extra keyword args added for controlling the prosthetics env, so remove them. del kwargs['model'] del kwargs['prosthetic'] del kwargs['difficulty'] del kwargs['reward_shaping_x'] del kwargs['frameskip'] del kwargs['eval_frameskip'] del kwargs['crowdai_submit'] del kwargs['eval_only'] # Parse noise_type action_noise = None param_noise = None nb_actions = train_env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) actor_layer_sizes = [ int(x) for x in kwargs['actor_layer_sizes'].replace('[', '').replace( ']', '').split(',') ] critic_layer_sizes = [ int(x) for x in kwargs['critic_layer_sizes'].replace('[', '').replace( ']', '').split(',') ] del kwargs['actor_layer_sizes'] del kwargs['critic_layer_sizes'] logger.info('actor_layer_sizes', actor_layer_sizes) logger.info('critic_layer_sizes', critic_layer_sizes) # Configure components. memory = Memory(limit=int(1e6), action_shape=train_env.action_space.shape, observation_shape=train_env.observation_space.shape) critic = Critic(layer_norm=layer_norm, activation=kwargs['activation'], layer_sizes=critic_layer_sizes) actor = Actor(nb_actions, layer_norm=layer_norm, activation=kwargs['activation'], layer_sizes=actor_layer_sizes) del kwargs['activation'] # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) train_env.seed(seed) if eval_env: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=train_env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) train_env.close() if eval_env: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. # env = gym.make(env_id) # env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None #dc = TestContainer(num_assets=3, num_samples=20000) dc = BitcoinTestContainer(csv_file_name='../../../data/csvs/output.csv') env = TradingStateModel(datacontainer=dc, episode_length=kwargs['nb_rollout_steps'], is_training=True, commission_percentage=COMMISSION_PERCENTAGE) # Parse noise_type action_noise = None param_noise = None # nb_actions = env.action_space.shape[-1] nb_actions = env.datacontainer.num_assets for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. # memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(num_asset_features=env.datacontainer.total_asset_features, num_actions=env.datacontainer.num_assets, asset_features_shape=env.asset_features_shape, portfolio_features_shape=env.portfolio_features_shape, layer_norm=layer_norm) actor = Actor(nb_actions, num_asset_features=env.datacontainer.total_asset_features, num_actions=env.datacontainer.num_assets, asset_features_shape=env.asset_features_shape, portfolio_features_shape=env.portfolio_features_shape, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) # env.seed(seed) # if eval_env is not None: # eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, tensorboard_directory='./tensorboard_' + str(COMMISSION_PERCENTAGE), infer_directory='./infer_ims_' + str(COMMISSION_PERCENTAGE), **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank==0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))