def run_batch_job(params, LLcluster=True): import os exp_name_with_params = get_formatted_name(params) np.save("params/" + exp_name_with_params + "_params.npy", params) filename = write_batch_job(exp_name_with_params, LLcluster=LLcluster) if LLcluster: os.system("LLsub " + filename) else: os.system("./" + filename)
def run_alg(params, iters=2, hyperparam_file=None, LLcluster=True, exp_number=None, use_sfa=False, use_auto=False): exp_name = get_formatted_name(params) if hyperparam_file is None: hyperparam_file = "hyperparams/" + exp_name + "best_hyperparams.npy" hyperparams = np.load(hyperparam_file, allow_pickle=True, encoding="latin1").all() params["encoder"] = {} if use_sfa: params["encoder"]["forces"] = make_sfa_node(SAVE_DIR + "force_states.npy") if use_auto: params["encoder"]["im"] = (SAVE_DIR + "models/encoderside.h5", SAVE_DIR + "models/encodertop.h5") args = {"sample_config_best": hyperparams} #overwrite the old ones args["obs_noise_std"] = params["obs_noise_std"] args["rew_noise_std"] = params["rew_noise_std"] args["action_noise_std"] = params["action_noise_std"] args["goal_radius"] = params["goal_radius"] trainable = make_class(params)(args) #print("made class") info_data = {} test_success_rates = [] SAVE_INTERVAL = 2 if LLcluster and exp_number is None: exp_number = os.environ["SLURM_ARRAY_TASK_ID"] for i in range(int(iters)): print("on iter #", i) test_res = trainable._train() test_success_rates.append(test_res['success_rate']) infos = test_res['infos'] for info in infos.keys(): if info in info_data.keys(): info_data[info].append(infos[info]) else: info_data[info] = [infos[info]] if i % SAVE_INTERVAL == 0: print(" last success rate", test_res["success_rate"]) for info in info_data.keys(): np.save( "run_results/" + exp_name + info + "_" + str(exp_number) + ".npy", info_data[info]) np.save( "run_results/" + exp_name + "test_success_rates_" + str(exp_number) + ".npy", test_success_rates)
def _train(self): self.local_variables['update'] = self.nupdates print("nupdates", self.alg, self.nupdates, " of ", self.nupdates_total) _, tmp_var, infos = self.alg_module.learn_iter( **self.local_variables) #test_success_rate = tmp_var if self.env_name == "StirEnv-v0" or self.env_name == "ScoopEnv-v0": num_tests = 5 else: num_tests = 17 #25 test_success_rate = self._test( n_test_rollouts=num_tests)['success_rate'] if np.isnan(test_success_rate): import ipdb ipdb.set_trace() self.lock.acquire() if test_success_rate > self.best_success_rate: self.best_success_rates.append(test_success_rate) self.best_success_rate = test_success_rate if True or test_success_rate > 0: np.save( SAVE_DIR + "hyperparams/" + get_formatted_name(self.params) + "best_params_so_far.npy", self.sample_config_bound) np.save( SAVE_DIR + "hyperparams/" + get_formatted_name(self.params) + "_best_success_rates.npy", self.best_success_rates) self.lock.release() self.nupdates += 1 self.mean_reward_over_samples.append(test_success_rate) return { 'done': self.nupdates > self.nupdates_total or test_success_rate > 0.95, 'success_rate': test_success_rate, "infos": infos, 'episode_reward_mean': test_success_rate }
def best_hyperparams_for_config(params, exp_name, smoke_test=False): import ray ray.init() res = run_async_hyperband(expname=exp_name, smoke_test=smoke_test, params=params) best_params = pick_params(res, exp_name) if not smoke_test: np.save( "hyperparams/" + get_formatted_name(params) + "best_hyperparams.npy", best_params) ray.shutdown() return best_params
def test_write_batch_job(): default_params = { 'env_name': "Pendulum-v0", 'exp_name': "test", 'obs_noise_std': 0, 'action_noise_std': 0, 'alg': 'naf', 'goal_radius': 0.05 } write_batch_job(default_params) f = open("batch_scripts/batch_job/" + get_formatted_name(default_params) + ".sh") print(f.read()) f.close()
def run_action_noise_experiment(num_samples, param_set, exp_name, env_name, LLcluster=True, smoke_test=False): #create experiment file based on params #run experiment using LLsub, probably alg by alg and params by params default_params = { 'env_name': env_name, 'exp_name': exp_name, 'obs_noise_std': 0, 'action_noise_std': 0, 'goal_radius': 0.3, 'rew_noise_std': 0.0, "encoder": {} } use_auto = True use_sfa = False for alg in algs: default_params['alg'] = alg if use_sfa: default_params["encoder"]["forces"] = make_sfa_node( SAVE_DIR + "force_states.npy") if use_auto: default_params["encoder"]["im"] = SAVE_DIR + "models/encoder.h5" #sample_space = {0, 0.01, 0.1} sample_space = {0.3} #sample_space = {0.01, 0.05, 0.08, 0.1} for action_noise_std in sample_space: params = default_params.copy() params['goal_radius'] = action_noise_std hyperparam_file = get_formatted_name( params) + "best_hyperparams.npy" if hyperparam_file not in os.listdir("hyperparams") or smoke_test: optimize_hyperparams(params, smoke_test=smoke_test) else: print("Already found the hyperparams") run_batch_job(params, LLcluster=LLcluster)
def _setup(self, arg): #self.alg_module = arg["alg_module"] env_name = params['env_name'] self.env_name = env_name self.exp_name = params['exp_name'] self.save_dir = SAVE_DIR + "tune_run_results/" + get_short_form_name( params) if not os.path.isdir(self.save_dir): os.mkdir(self.save_dir) self.params = params self.alg = params['alg'] self.lock = threading.Lock() self.best_success_rates = [] date_object = datetime.now() self.exp_start_time = date_object.strftime('%Y-%m-%d') self.best_success_rate = -np.inf self.alg_module = alg_to_module(self.alg) config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) force_flat = True if self.alg == "her": force_flat = False sample_config, fixed_config, env_config, cont_space = alg_to_config( params['alg'], env_name, force_flat=force_flat) if 'sample_config_best' not in arg.keys(): sample_config_sample = { ky: arg[ky] for ky in sample_config.keys() } sample_config_bound = sample_config_sample learn_params = {**sample_config_sample, **fixed_config} total_iters = tune_alg_to_iters[self.alg] else: learn_params = {**arg['sample_config_best'], **fixed_config} sample_config_bound = arg['sample_config_best'] total_iters = train_alg_to_iters[self.alg] self.sample_config_bound = sample_config_bound action_noise_std = params['action_noise_std'] obs_noise_std = params['obs_noise_std'] rew_noise_std = params['rew_noise_std'] goal_radius = params['goal_radius'] reward_scale = 1.0 if "reward_scale" in arg.keys(): reward_scale = arg["reward_scale"] self.nupdates_total = total_iters print("total num updates", self.nupdates_total) self.nupdates = 1 encoder_option = params["encoder"] if len(encoder_option.keys()) == 0: encoder_option = None else: assert (False) assert (force_flat) env = make_vec_env(env_name, "mujoco", env_config['num_env'] or 1, None, reward_scale=reward_scale, flatten_dict_observations=force_flat, rew_noise_std=rew_noise_std, action_noise_std=action_noise_std, obs_noise_std=obs_noise_std, distance_threshold=goal_radius, encoder=encoder_option) #env = make_vec_env(env_name, "mujoco", env_config['num_env'] or 1, None, reward_scale=reward_scale, flatten_dict_observations=flatten_dict_observations, action_noise_std=action_noise_std, obs_noise_std=obs_noise_std) if self.alg == "ppo2": #env = VecNormalize(env) learn_params["nupdates"] = self.nupdates_total if 'env' not in learn_params.keys(): learn_params['env'] = env learn_params["exp_name"] = get_formatted_name(self.params) #learn_params["load_file"] = "ppo2ScoopEnv-v0AL83bneitherobs_0.0act_0.0rw_0.3rew_noise_std_0.0" self.local_variables = self.alg_module.learn_setup(**learn_params) self.mean_reward_over_samples = [] if env_name in ["FetchPush-v1", "FetchReach-v1"]: self.local_variables["success_only"] = True else: print(env_name) self.local_variables["success_only"] = False
def optimize_hyperparams(params, smoke_test=False): for alg in algs: exp_name = get_formatted_name(params) best_hyperparams_for_config(params, exp_name, smoke_test=smoke_test)