def run(env_id, seed, evaluation, **kwargs): # Create envs. env = gym.make(env_id) # print(env.action_space.shape) logger.info("Env info") logger.info(env.__doc__) logger.info("-" * 20) gym.logger.setLevel(logging.WARN) if kwargs['skillset']: skillset_file = __import__("HER.skills.%s" % kwargs['skillset'], fromlist=['']) my_skill_set = SkillSet(skillset_file.skillset) else: my_skill_set = None set_global_seeds(seed) env.seed(seed) model_path = os.path.join(kwargs['restore_dir'], "model") testing.testing(env, model_path, my_skill_set, kwargs['render_eval'], kwargs['commit_for'], kwargs['nb_eval_episodes']) env.close()
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) logger.debug("Env info") logger.debug(env.__doc__) logger.debug("-" * 20) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: if kwargs['eval_env_id']: eval_env_id = kwargs['eval_env_id'] else: eval_env_id = env_id eval_env = gym.make(eval_env_id) # del eval_env_id from kwargs del kwargs['eval_env_id'] else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'epsnorm' in current_noise_type: _, stddev, epsilon = current_noise_type.split('_') action_noise = EpsilonNormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions), epsilon=float(epsilon)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank tf.reset_default_graph() # importing the current skill configs if kwargs['look_ahead'] and kwargs['skillset']: skillset_file = __import__("HER.skills.%s" % kwargs['skillset'], fromlist=['']) my_skill_set = SkillSet(skillset_file.skillset) else: my_skill_set = None set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: logger.info('rank {}: seed={}, logdir={}'.format( rank, seed, logger.get_dir())) start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, my_skill_set=my_skill_set, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = bench.Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = bench.Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) #env = bench.Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) # print(env.action_space.shape) logger.info("Env info") logger.info(env.__doc__) logger.info("-" * 20) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: if kwargs['eval_env_id']: eval_env_id = kwargs['eval_env_id'] else: eval_env_id = env_id eval_env = gym.make(eval_env_id) # del eval_env_id from kwargs del kwargs['eval_env_id'] else: eval_env = None # Parse noise_type action_noise = None param_noise = None tf.reset_default_graph() ## this is a HACK if kwargs['skillset']: # import HER.skills.set2 as skillset_file skillset_file = __import__("HER.skills.%s" % kwargs['skillset'], fromlist=['']) my_skill_set = SkillSet(skillset_file.skillset) nb_actions = my_skill_set.params + my_skill_set.len else: nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'epsnorm' in current_noise_type: _, stddev, epsilon = current_noise_type.split('_') action_noise = EpsilonNormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions), epsilon=float(epsilon)) elif 'pepsnorm' in current_noise_type: _, stddev, epsilon = current_noise_type.split('_') action_noise = EpsilonNormalParameterizedActionNoise( mu=np.zeros(my_skill_set.num_params), sigma=float(stddev) * np.ones(my_skill_set.num_params), epsilon=float(epsilon), discrete_actions_dim=my_skill_set.len) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=(nb_actions, ), observation_shape=env.observation_space.shape) if kwargs['newarch']: critic = Critic(layer_norm=layer_norm, hidden_unit_list=[400, 300]) elif kwargs['newcritic']: critic = NewCritic(layer_norm=layer_norm) else: critic = Critic(layer_norm=layer_norm) if kwargs['skillset'] is None: if kwargs['newarch']: actor = Actor(discrete_action_size=env.env.discrete_action_size, cts_action_size=nb_actions - env.env.discrete_action_size, layer_norm=layer_norm, hidden_unit_list=[400, 300]) else: actor = Actor(discrete_action_size=env.env.discrete_action_size, cts_action_size=nb_actions - env.env.discrete_action_size, layer_norm=layer_norm) my_skill_set = None else: # pass # get the skillset and make actor accordingly if kwargs['newarch']: actor = Actor(discrete_action_size=my_skill_set.len, cts_action_size=nb_actions - my_skill_set.len, layer_norm=layer_norm, hidden_unit_list=[400, 300]) else: actor = Actor(discrete_action_size=my_skill_set.len, cts_action_size=nb_actions - my_skill_set.len, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, my_skill_set=my_skill_set, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, render, log_dir, restore_dir, commit_for, train_epoch, batch_size=32, lr=1e-3, seed=0, dataset_size=2000): env = gym.make(env_id) observation_shape = env.observation_space.shape[-1] global in_size, out_size in_size = observation_shape out_size = observation_shape - 3 set_global_seeds(seed) env.seed(seed) with U.single_threaded_session() as sess: actor_model = DDPGSkill(observation_shape=(observation_shape, ), skill_name="skill", nb_actions=env.action_space.shape[-1], restore_path=restore_dir) print("Assumption: Goal is 3d target location") pred_model = regressor(in_shape=in_size, out_shape=out_size, name="suc_pred_model", sess=sess, log_dir=log_dir) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer(), # train_iter.initializer, test_iter.initializer ) sess.run(init_op) # restore actor actor_model.restore_skill(path=get_home_path( osp.expanduser(restore_dir)), sess=sess) generate_data(env, env_id, log_dir, actor_model, dataset_size, commit_for, render) exit(1) ## creating dataset tensors csv_filename = osp.join(log_dir, "%s.csv" % env_id) # base_dataset = tf.data.TextLineDataset(csv_filename) # train_dataset = base_dataset.filter(in_training_set).map(decode_line).shuffle(buffer_size=5*batch_size, seed =seed).repeat().batch(batch_size) # train_iter = train_dataset.make_initializable_iterator() # train_el = train_iter.get_next() # test_dataset = base_dataset.filter(in_test_set).map(decode_line).batch(batch_size) # test_iter = test_dataset.make_initializable_iterator() # test_el = test_iter.get_next() ## base_dataset = pd.read_csv(csv_filename) train, test = train_test_split(base_dataset, test_size=0.2) # print(train.shape, test.shape) # whiten train_mean = np.mean(train, axis=0) train_std = np.std(train, axis=0) # save mean and var statistics = np.concatenate((train_mean, train_std)) with open(osp.join(log_dir, "%s_stat.npy" % env_id), 'wb') as f: np.save(f, statistics) # create pd train_dataset = ((train - train_mean) / train_std) test_dataset = ((test - train_mean) / train_std) test_dataset = test_dataset.values test_dataset = [test_dataset[:, :in_size], test_dataset[:, in_size:]] #### print(train_dataset.shape, test_dataset[0].shape) pred_model.train(train_epoch, batch_size, lr, train_dataset, test_dataset) pred_model.save()
def run(env_id, seed, noise_type, layer_norm, evaluation, memory_size, factor, **kwargs): # Configure things. rank = 0 if rank != 0: logger.set_level(logger.DISABLED) dologging = kwargs["dologging"] # Create envs. env = gym.make(env_id) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: eval_env = gym.make(env_id) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec( initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise( mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError( 'unknown noise type "{}"'.format(current_noise_type)) # Configure components. single_train = False ospace = env.observation_space has_image = (not hasattr(ospace, 'shape')) or (not ospace.shape) if has_image: assert isinstance(env.observation_space, gym.spaces.Tuple) env.observation_space.shape = [ x.shape for x in env.observation_space.spaces ] #eval_env.observation_space.shape = [x.shape for x in eval_env.observation_space.spaces] if rank == 0 or not single_train: memory = Memory(limit=memory_size, action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) else: memory = None if has_image: ignore = False if ignore: critic = IgnoreDepthCritic(layer_norm=layer_norm) actor = IgnoreDepthActor(nb_actions, layer_norm=layer_norm) else: critic = DepthCritic(layer_norm=layer_norm) if factor: actor = FactoredDepthActor(nb_actions, layer_norm=layer_norm) else: actor = DepthActor(nb_actions, layer_norm=layer_norm) else: critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(6) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() testing.test(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, render, log_dir, train_epoch, batch_size=32, lr=1e-3, seed=0, whiten=False): env = gym.make(env_id) observation_shape = env.observation_space.shape[-1] global in_size, out_size in_size = observation_shape out_size = observation_shape - 3 set_global_seeds(seed) # env.seed(seed) with U.single_threaded_session() as sess: ## creating dataset tensors csv_filename = osp.join(log_dir, "%s.csv" % env_id) ## base_dataset = np.loadtxt(csv_filename, delimiter=',') train, test = train_test_split(base_dataset, test_size=0.2) # NN error nn_error = get_nn_error(train, test, in_size) print("memory based nn error", nn_error) # whiten if whiten: train_feat_mean = np.mean(train, axis=0) train_feat_std = np.std(train, axis=0) # save mean and var statistics = np.concatenate((train_feat_mean, train_feat_std)) with open(osp.join(log_dir, "%s_stat.npy" % env_id), 'wb') as f: np.save(f, statistics) # create pd train_dataset = ((train - train_feat_mean) / (train_feat_std + eps)) # print(train_dataset.shape, train_labels[:, np.newaxis].shape) train_dataset = pd.DataFrame(train_dataset) test_dataset = ((test - train_feat_mean) / (train_feat_std + eps)) #### print(train_dataset.shape, test_dataset[0].shape) whiten_data = [train_feat_mean[in_size:], train_feat_std[in_size:]] else: # train_dataset = pd.DataFrame(np.concatenate((train_feat, train_labels[:, np.newaxis]),axis=1)) train_dataset = pd.DataFrame(train) test_dataset = test #pd.DataFrame(test)#[test[:, :-1], test[:,[-1]]] whiten_data = None pred_model = regressor(in_shape=in_size, out_shape=out_size, name="succmodel", sess=sess, log_dir=log_dir, whiten_data=whiten_data) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer(), ) sess.run(init_op) pred_model.train(train_epoch, batch_size, lr, train_dataset, test_dataset) pred_model.save()
def run(env_id, render, log_dir, train_epoch, batch_size=32, lr=1e-3, seed=0, whiten=False): env = gym.make(env_id) observation_shape = env.observation_space.shape[-1] global in_size, out_size in_size = observation_shape out_size = 1 set_global_seeds(seed) env.seed(seed) with U.single_threaded_session() as sess: pred_model = classifier(in_shape=in_size, out_shape=out_size, name="suc_pred_model", sess=sess, log_dir=log_dir) init_op = tf.group( tf.global_variables_initializer(), tf.local_variables_initializer(), ) sess.run(init_op) ## creating dataset tensors csv_filename = osp.join(log_dir, "%s_data.csv" % env_id) ## base_dataset = np.loadtxt(csv_filename, delimiter=',') train, test = train_test_split(base_dataset, test_size=0.2) train_feat = train[:, :-1] train_labels = train[:, -1] # print(train.shape, test.shape) # whiten if whiten: train_feat_mean = np.mean(train_feat, axis=0) train_feat_std = np.std(train_feat, axis=0) # save mean and var statistics = np.concatenate((train_feat_mean, train_feat_std)) with open(osp.join(log_dir, "%s_stat.npy" % env_id), 'wb') as f: np.save(f, statistics) # create pd train_feat_dataset = ((train_feat - train_feat_mean) / train_feat_std) print(train_feat_dataset.shape, train_labels[:, np.newaxis].shape) train_dataset = pd.DataFrame( np.concatenate( (train_feat_dataset, train_labels[:, np.newaxis]), axis=1)) test_feat_dataset = ((test[:, :-1] - train_feat_mean) / train_feat_std) test_dataset = [test_feat_dataset, test[:, [-1]]] #### print(train_dataset.shape, test_dataset[0].shape) else: train_dataset = pd.DataFrame( np.concatenate((train_feat, train_labels[:, np.newaxis]), axis=1)) test_dataset = [test[:, :-1], test[:, [-1]]] pred_model.train(train_epoch, batch_size, lr, train_dataset, test_dataset) pred_model.save()
def run(env_id, seed, evaluation, **kwargs): # Create envs. env = gym.make(env_id) # print(env.action_space.shape) logger.info("Env info") logger.info(env.__doc__) logger.info("-"*20) gym.logger.setLevel(logging.WARN) if evaluation: if kwargs['eval_env_id']: eval_env_id = kwargs['eval_env_id'] else: eval_env_id = env_id eval_env = gym.make(eval_env_id) # del eval_env_id from kwargs del kwargs['eval_env_id'] else: eval_env = None if kwargs['skillset']: skillset_file = __import__("HER.skills.%s"%kwargs['skillset'], fromlist=['']) my_skill_set = SkillSet(skillset_file.skillset) model = models.mlp([64]) # Seed everything to make things reproducible. logger.info('seed={}, logdir={}'.format(seed, logger.get_dir())) set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) start_time = time.time() training.train( env=env, eval_env = eval_env, q_func=model, lr=kwargs['lr'], max_timesteps=kwargs['num_timesteps'], buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.002, train_freq=1, batch_size=kwargs['batch_size'], print_freq=100, checkpoint_freq=kwargs['save_freq'], learning_starts=max(50, kwargs['batch_size']), target_network_update_freq=100, prioritized_replay= kwargs['prioritized_replay'], prioritized_replay_alpha=0.6, prioritized_replay_beta0=0.4, prioritized_replay_beta_iters=None, prioritized_replay_eps=1e-6, param_noise=False, gamma = kwargs['gamma'], log_dir = kwargs['log_dir'], my_skill_set= my_skill_set, num_eval_episodes=kwargs['num_eval_episodes'], render = kwargs['render'], render_eval = kwargs['render_eval'], commit_for = kwargs['commit_for'] ) env.close() if eval_env is not None: eval_env.close() logger.info('total runtime: {}s'.format(time.time() - start_time))
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = 0 if rank != 0: logger.set_level(logger.DISABLED) dologging = kwargs["dologging"] # Create envs. env = gym.make(env_id) gym.logger.setLevel(logging.WARN) if evaluation and rank == 0: eval_env = gym.make(env_id) else: eval_env = None tf.reset_default_graph() if kwargs['skillset']: skillset_file = __import__("HER.skills.%s" % kwargs['skillset'], fromlist=['']) my_skill_set = SkillSet(skillset_file.skillset) nb_actions = my_skill_set.params + my_skill_set.len else: nb_actions = env.action_space.shape[-1] # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) if kwargs['skillset'] is None: actor = Actor(discrete_action_size=env.env.discrete_action_size, cts_action_size=nb_actions - env.env.discrete_action_size, layer_norm=layer_norm) my_skill_set = None else: # pass # get the skillset and make actor accordingly actor = Actor(discrete_action_size=my_skill_set.len, cts_action_size=nb_actions - my_skill_set.len, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() testing.test(env=env, eval_env=eval_env, param_noise=None, action_noise=None, actor=actor, critic=critic, memory=memory, my_skill_set=my_skill_set, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))