コード例 #1
0
def run_batch_job(params, LLcluster=True):
    import os
    exp_name_with_params = get_formatted_name(params)
    np.save("params/" + exp_name_with_params + "_params.npy", params)
    filename = write_batch_job(exp_name_with_params, LLcluster=LLcluster)
    if LLcluster:
        os.system("LLsub " + filename)
    else:
        os.system("./" + filename)
コード例 #2
0
def run_alg(params,
            iters=2,
            hyperparam_file=None,
            LLcluster=True,
            exp_number=None,
            use_sfa=False,
            use_auto=False):
    exp_name = get_formatted_name(params)
    if hyperparam_file is None:
        hyperparam_file = "hyperparams/" + exp_name + "best_hyperparams.npy"
    hyperparams = np.load(hyperparam_file,
                          allow_pickle=True,
                          encoding="latin1").all()
    params["encoder"] = {}
    if use_sfa:
        params["encoder"]["forces"] = make_sfa_node(SAVE_DIR +
                                                    "force_states.npy")
    if use_auto:
        params["encoder"]["im"] = (SAVE_DIR + "models/encoderside.h5",
                                   SAVE_DIR + "models/encodertop.h5")
    args = {"sample_config_best": hyperparams}
    #overwrite the old ones
    args["obs_noise_std"] = params["obs_noise_std"]
    args["rew_noise_std"] = params["rew_noise_std"]
    args["action_noise_std"] = params["action_noise_std"]
    args["goal_radius"] = params["goal_radius"]
    trainable = make_class(params)(args)
    #print("made class")
    info_data = {}
    test_success_rates = []
    SAVE_INTERVAL = 2
    if LLcluster and exp_number is None:
        exp_number = os.environ["SLURM_ARRAY_TASK_ID"]
    for i in range(int(iters)):
        print("on iter #", i)
        test_res = trainable._train()
        test_success_rates.append(test_res['success_rate'])
        infos = test_res['infos']
        for info in infos.keys():
            if info in info_data.keys():
                info_data[info].append(infos[info])
            else:
                info_data[info] = [infos[info]]

        if i % SAVE_INTERVAL == 0:
            print(" last success rate", test_res["success_rate"])
            for info in info_data.keys():
                np.save(
                    "run_results/" + exp_name + info + "_" + str(exp_number) +
                    ".npy", info_data[info])
            np.save(
                "run_results/" + exp_name + "test_success_rates_" +
                str(exp_number) + ".npy", test_success_rates)
コード例 #3
0
        def _train(self):
            self.local_variables['update'] = self.nupdates
            print("nupdates", self.alg, self.nupdates, " of ",
                  self.nupdates_total)
            _, tmp_var, infos = self.alg_module.learn_iter(
                **self.local_variables)
            #test_success_rate = tmp_var
            if self.env_name == "StirEnv-v0" or self.env_name == "ScoopEnv-v0":
                num_tests = 5
            else:
                num_tests = 17  #25
            test_success_rate = self._test(
                n_test_rollouts=num_tests)['success_rate']
            if np.isnan(test_success_rate):
                import ipdb
                ipdb.set_trace()
            self.lock.acquire()
            if test_success_rate > self.best_success_rate:
                self.best_success_rates.append(test_success_rate)
                self.best_success_rate = test_success_rate
                if True or test_success_rate > 0:
                    np.save(
                        SAVE_DIR + "hyperparams/" +
                        get_formatted_name(self.params) +
                        "best_params_so_far.npy", self.sample_config_bound)
                    np.save(
                        SAVE_DIR + "hyperparams/" +
                        get_formatted_name(self.params) +
                        "_best_success_rates.npy", self.best_success_rates)
            self.lock.release()

            self.nupdates += 1
            self.mean_reward_over_samples.append(test_success_rate)
            return {
                'done': self.nupdates > self.nupdates_total
                or test_success_rate > 0.95,
                'success_rate': test_success_rate,
                "infos": infos,
                'episode_reward_mean': test_success_rate
            }
コード例 #4
0
def best_hyperparams_for_config(params, exp_name, smoke_test=False):
    import ray
    ray.init()
    res = run_async_hyperband(expname=exp_name,
                              smoke_test=smoke_test,
                              params=params)
    best_params = pick_params(res, exp_name)
    if not smoke_test:
        np.save(
            "hyperparams/" + get_formatted_name(params) +
            "best_hyperparams.npy", best_params)
    ray.shutdown()
    return best_params
コード例 #5
0
def test_write_batch_job():
    default_params = {
        'env_name': "Pendulum-v0",
        'exp_name': "test",
        'obs_noise_std': 0,
        'action_noise_std': 0,
        'alg': 'naf',
        'goal_radius': 0.05
    }
    write_batch_job(default_params)
    f = open("batch_scripts/batch_job/" + get_formatted_name(default_params) +
             ".sh")
    print(f.read())
    f.close()
コード例 #6
0
def run_action_noise_experiment(num_samples,
                                param_set,
                                exp_name,
                                env_name,
                                LLcluster=True,
                                smoke_test=False):
    #create experiment file based on params
    #run experiment using LLsub, probably alg by alg and params by params
    default_params = {
        'env_name': env_name,
        'exp_name': exp_name,
        'obs_noise_std': 0,
        'action_noise_std': 0,
        'goal_radius': 0.3,
        'rew_noise_std': 0.0,
        "encoder": {}
    }
    use_auto = True
    use_sfa = False
    for alg in algs:
        default_params['alg'] = alg
        if use_sfa:
            default_params["encoder"]["forces"] = make_sfa_node(
                SAVE_DIR + "force_states.npy")
        if use_auto:
            default_params["encoder"]["im"] = SAVE_DIR + "models/encoder.h5"

        #sample_space = {0, 0.01, 0.1}
        sample_space = {0.3}
        #sample_space = {0.01, 0.05, 0.08, 0.1}
        for action_noise_std in sample_space:
            params = default_params.copy()
            params['goal_radius'] = action_noise_std
            hyperparam_file = get_formatted_name(
                params) + "best_hyperparams.npy"
            if hyperparam_file not in os.listdir("hyperparams") or smoke_test:
                optimize_hyperparams(params, smoke_test=smoke_test)
            else:
                print("Already found the hyperparams")
            run_batch_job(params, LLcluster=LLcluster)
コード例 #7
0
        def _setup(self, arg):
            #self.alg_module = arg["alg_module"]
            env_name = params['env_name']
            self.env_name = env_name
            self.exp_name = params['exp_name']
            self.save_dir = SAVE_DIR + "tune_run_results/" + get_short_form_name(
                params)
            if not os.path.isdir(self.save_dir):
                os.mkdir(self.save_dir)
            self.params = params
            self.alg = params['alg']
            self.lock = threading.Lock()
            self.best_success_rates = []
            date_object = datetime.now()
            self.exp_start_time = date_object.strftime('%Y-%m-%d')
            self.best_success_rate = -np.inf
            self.alg_module = alg_to_module(self.alg)
            config = tf.ConfigProto(allow_soft_placement=True,
                                    intra_op_parallelism_threads=1,
                                    inter_op_parallelism_threads=1)
            config.gpu_options.allow_growth = True
            get_session(config=config)
            force_flat = True
            if self.alg == "her":
                force_flat = False

            sample_config, fixed_config, env_config, cont_space = alg_to_config(
                params['alg'], env_name, force_flat=force_flat)
            if 'sample_config_best' not in arg.keys():
                sample_config_sample = {
                    ky: arg[ky]
                    for ky in sample_config.keys()
                }
                sample_config_bound = sample_config_sample
                learn_params = {**sample_config_sample, **fixed_config}
                total_iters = tune_alg_to_iters[self.alg]
            else:
                learn_params = {**arg['sample_config_best'], **fixed_config}
                sample_config_bound = arg['sample_config_best']
                total_iters = train_alg_to_iters[self.alg]
            self.sample_config_bound = sample_config_bound
            action_noise_std = params['action_noise_std']
            obs_noise_std = params['obs_noise_std']
            rew_noise_std = params['rew_noise_std']
            goal_radius = params['goal_radius']

            reward_scale = 1.0
            if "reward_scale" in arg.keys():
                reward_scale = arg["reward_scale"]
            self.nupdates_total = total_iters
            print("total num updates", self.nupdates_total)
            self.nupdates = 1
            encoder_option = params["encoder"]
            if len(encoder_option.keys()) == 0:
                encoder_option = None
            else:
                assert (False)
            assert (force_flat)
            env = make_vec_env(env_name,
                               "mujoco",
                               env_config['num_env'] or 1,
                               None,
                               reward_scale=reward_scale,
                               flatten_dict_observations=force_flat,
                               rew_noise_std=rew_noise_std,
                               action_noise_std=action_noise_std,
                               obs_noise_std=obs_noise_std,
                               distance_threshold=goal_radius,
                               encoder=encoder_option)
            #env = make_vec_env(env_name, "mujoco", env_config['num_env'] or 1, None, reward_scale=reward_scale, flatten_dict_observations=flatten_dict_observations, action_noise_std=action_noise_std, obs_noise_std=obs_noise_std)
            if self.alg == "ppo2":
                #env = VecNormalize(env)
                learn_params["nupdates"] = self.nupdates_total
            if 'env' not in learn_params.keys():
                learn_params['env'] = env
            learn_params["exp_name"] = get_formatted_name(self.params)

            #learn_params["load_file"] = "ppo2ScoopEnv-v0AL83bneitherobs_0.0act_0.0rw_0.3rew_noise_std_0.0"
            self.local_variables = self.alg_module.learn_setup(**learn_params)
            self.mean_reward_over_samples = []
            if env_name in ["FetchPush-v1", "FetchReach-v1"]:
                self.local_variables["success_only"] = True
            else:
                print(env_name)
                self.local_variables["success_only"] = False
コード例 #8
0
def optimize_hyperparams(params, smoke_test=False):
    for alg in algs:
        exp_name = get_formatted_name(params)
        best_hyperparams_for_config(params, exp_name, smoke_test=smoke_test)