def run_simulations(args, save_result, local_mode, init_ray=True):
    if init_ray:
        start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]
    config_grid_vals = get_grid(args.config_grid_def)
    n_config_grid = len(config_grid_vals)
    planing_loss = np.zeros((n_reps, n_config_grid, n_grid))
    info_dict = {}
    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        args_r = deepcopy(args)
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid,
                                             args_r.config_grid_def, args_r)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # end for i_rep
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        if i_rep % max(n_reps // 100, 1) == 0:
            time_str = time.strftime(
                "%H hours, %M minutes and %S seconds",
                time.gmtime(timeit.default_timer() - start_time))
            write_to_log(
                'Finished: {} out of {} reps, time: {}'.format(
                    i_rep + 1, n_reps, time_str), args)
        # end if
        planing_loss[i_rep] = loss_rep
        info_dict = {
            'planing_loss_avg': planing_loss.mean(axis=0),
            'planing_loss_std': planing_loss.std(axis=0),
            'alg_param_grid': alg_param_grid,
            'n_reps_finished': i_rep + 1
        }
        if save_result:
            save_run_data(args, info_dict, verbose=0)
        # end if
    # end for i_rep
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args, save_result)
    return info_dict
def run_simulations(args, save_result, local_mode):
    args_def = deepcopy(args)
    start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    param_val_grid = get_grid(args.param_grid_def)
    n_grid = param_val_grid.shape[0]

    config_grid = get_grid(args.config_grid_def)
    n_configs = len(config_grid)
    args.n_configs = n_configs

    loss_mat = np.zeros((n_reps, n_configs, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid,
                                         args)
        loss_rep_id_lst.append(loss_mat_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps),
                     args)
        loss_mat[i_rep] = loss_rep
    # end for i_rep
    info_dict = {
        'loss_avg': loss_mat.mean(axis=0),
        'loss_std': loss_mat.std(axis=0),
        'param_val_grid': param_val_grid,
        'config_grid': config_grid
    }
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    write_to_log(
        ['-' * 10 + 'Defined args: ',
         pretty_print_args(args_def), '-' * 20], args)
    return info_dict
def run_simulations(args, local_mode):
    start_ray(local_mode)
    create_result_dir(args)
    write_to_log('local_mode == {}'.format(local_mode), args)
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    l2_grid = get_grid(args.l2_grid_def)
    gam_grid = get_grid(args.gam_grid_def)
    write_to_log('gamma_grid == {}'.format(gam_grid), args)
    write_to_log('l2_grid == {}'.format(l2_grid), args)
    grid_shape = (len(l2_grid), len(gam_grid))
    loss_avg = np.zeros(grid_shape)
    loss_std = np.zeros(grid_shape)

    run_idx = 0
    for i0 in range(grid_shape[0]):
        for i1 in range(grid_shape[1]):
            args_run = deepcopy(args)
            args_run.param_grid_def = {
                'type': 'L2_factor',
                'spacing': 'list',
                'list': [l2_grid[i0]]
            }
            args_run.default_gamma = gam_grid[i1]

            info_dict = run_main_control(args_run,
                                         save_result=False,
                                         plot=False,
                                         init_ray=False)
            loss_avg[i0, i1] = info_dict['planing_loss_avg'][0]
            loss_std[i0, i1] = info_dict['planing_loss_std'][0]
            run_idx += 1
            print("Finished {}/{}".format(run_idx, loss_avg.size))
        # end for
    # end for
    grid_results_dict = {
        'l2_grid': l2_grid,
        'gam_grid': gam_grid,
        'loss_avg': loss_avg,
        'loss_std': loss_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
def run_rep(i_rep, alg_param_grid, config_grid_def, args_r):

    set_random_seed(args_r.seed + i_rep)
    config_grid_vals = get_grid(config_grid_def)
    n_config_grid = len(config_grid_vals)
    n_grid = len(alg_param_grid)
    # runs a single repetition of the experiment
    loss_rep = np.zeros((n_config_grid, n_grid))
    gammaEval = args_r.gammaEval
    config_type = config_grid_def['type']

    # grid of number of trajectories to generate
    for i_config, config_val in enumerate(config_grid_vals):
        n_traj = args_r.n_trajectories
        if config_type == 'n_trajectories':
            n_traj = config_val
        elif config_type == 'states_actions_TV_dist_from_uniform':
            args_r.train_sampling_def = {
                'type': 'Generative',
                'states_TV_dist_from_uniform': config_val,
                'actions_TV_dist_from_uniform': config_val
            }
        elif config_type == 'chain_mix_time':
            args_r.train_sampling_def = {
                'type': 'chain_mix_time',
                'mix_time': config_val
            }
        elif config_type == 'n_episodes':
            args_r.n_episodes = config_val
        elif config_type == 'None':
            pass
        else:
            raise AssertionError

        # Generate MDP:
        M = MDP(args_r)

        # Optimal policy for the MDP:
        pi_opt, V_opt, Q_opt = PolicyIteration(M, gammaEval)

        # grid of regularization param
        for i_grid, alg_param in enumerate(alg_param_grid):
            gamma_guidance, l1_factor, l2_factor = get_regularization_params(
                args_r, alg_param, args_r.param_grid_def['type'])

            # run the learning episodes:
            pi_t = run_learning_method(args_r, M, n_traj, gamma_guidance,
                                       l2_factor, l1_factor)

            # Evaluate performance of learned policy:
            V_t, _ = PolicyEvaluation(M, pi_t, gammaEval)

            loss_rep[i_config, i_grid] = (np.abs(V_opt - V_t)).mean()
        # end for grid
    #  end for i_config
    return loss_rep
Beispiel #5
0
def print_status(result_dir_to_load):

    args, info_dict = load_run_data(result_dir_to_load)
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = len(alg_param_grid)
    n_reps = args.n_reps
    print('Loaded parameters: \n', args, '\n', '-' * 20)
    if 'result_reward_mat' in info_dict.keys():
        result_reward_mat = info_dict['result_reward_mat']
    else:
        result_reward_mat = np.full(shape=(n_grid, n_reps), fill_value=np.nan)
    path = os.path.join(result_dir_to_load, 'jobs')
    all_files = glob.glob(os.path.join(path, "*.p"))
    n_rep_finish_per_point = np.full(shape=n_grid, fill_value=0, dtype=np.int)
    n_steps_finished = np.full(shape=(n_grid, n_reps),
                               fill_value=-1,
                               dtype=np.int)
    for f_path in all_files:
        save_dict = pickle.load(open(f_path, "rb"))
        job_info = save_dict['job_info']
        timesteps_snapshots = save_dict['timesteps_snapshots']
        i_grid = np.searchsorted(alg_param_grid, job_info['grid_param'],
                                 'right')
        if i_grid == len(alg_param_grid):
            # the loaded param is not in our defined alg_param_grid
            continue
            # TODO: add option to load all files, even if not in the defined alg_param_grid (show_all_saved_results flag)
        i_rep = job_info['i_rep']
        if not np.isnan(result_reward_mat[i_grid, i_rep]):
            n_steps_finished[i_grid, i_rep] = args.max_timesteps
            n_rep_finish_per_point[i_grid] += 1
        else:
            n_steps_finished[i_grid, i_rep] = timesteps_snapshots[-1]

    for i_grid, grid_param in enumerate(alg_param_grid):
        write_to_log(
            'Grid point {}/{}, val: {}, Number of finished reps loaded: {}'.
            format(1 + i_grid, len(alg_param_grid), grid_param,
                   n_rep_finish_per_point[i_grid]), args)
        for i_rep in range(n_reps):
            if n_steps_finished[i_grid, i_rep] != -1:
                print('Rep: {}, Finished Time-Steps: {}'.format(
                    i_rep, n_steps_finished[i_grid, i_rep]))
def run_main_control(args,
                     save_result=True,
                     load_run_data_flag=False,
                     result_dir_to_load='',
                     save_PDF=False,
                     plot=True,
                     local_mode=False,
                     init_ray=True):
    SetMdpArgs(args)
    if load_run_data_flag:
        args, info_dict = load_run_data(result_dir_to_load)
    else:
        info_dict = run_simulations(args,
                                    save_result,
                                    local_mode,
                                    init_ray=init_ray)
    planing_loss_avg = info_dict['planing_loss_avg']
    planing_loss_std = info_dict['planing_loss_std']
    alg_param_grid = info_dict['alg_param_grid']
    if 'n_reps_finished' in info_dict.keys():
        n_reps_finished = info_dict['n_reps_finished']
    else:
        n_reps_finished = args.n_reps
    # end if
    # ----- Plot figures  ---------------------------------------------#

    if plot or save_PDF:
        ax = plt.figure().gca()
        if args.train_sampling_def['type'] in {
                'Generative', 'Generative_uniform', 'Generative_Stationary'
        }:
            data_size_per_traj = args.depth * args.n_episodes
        elif args.train_sampling_def['type'] == 'Trajectories':
            data_size_per_traj = args.depth * args.n_episodes
        elif args.train_sampling_def['type'] == 'sample_all_s_a':
            data_size_per_traj = args.nS * args.nA * args.n_episodes
        else:
            raise AssertionError
        # end if
        xscale = 1.
        legend_title = ''
        if args.param_grid_def['type'] == 'L2_factor':
            plt.xlabel(r'$L_2$ Regularization Factor [1e-2]')
            xscale = 1e2
        elif args.param_grid_def['type'] == 'L1_factor':
            plt.xlabel(r'$L_1$ Regularization Factor ')
        elif args.param_grid_def['type'] == 'gamma_guidance':
            plt.xlabel(r'Guidance Discount Factor $\gamma$')
        else:
            raise AssertionError('Unrecognized args.grid_type')
        # end if
        ci_factor = 1.96 / np.sqrt(
            n_reps_finished)  # 95% confidence interval factor

        config_grid_vals = get_grid(args.config_grid_def)
        for i_config, config_val in enumerate(
                config_grid_vals):  # for of plots in the figure

            if args.config_grid_def['type'] == 'n_trajectories':
                if args.train_sampling_def['type'] in {
                        'Generative_uniform', 'Generative',
                        'Generative_Stationary'
                }:
                    legend_title = 'Num. Samples'
                    label_str = '{} '.format(config_val * data_size_per_traj)
                else:
                    legend_title = 'Num. Trajectories'
                    label_str = '{} '.format(config_val)

            elif args.config_grid_def[
                    'type'] == 'states_actions_TV_dist_from_uniform':
                legend_title = 'Total-Variation from\n uniform [normalized]'
                label_str = '{} '.format(config_val)

            elif args.config_grid_def['type'] == 'chain_mix_time':
                legend_title = 'Mixing time'
                label_str = '{} '.format(config_val)

            elif args.config_grid_def['type'] == 'n_episodes':
                legend_title = 'Num. Episodes'
                label_str = '{} '.format(config_val)

            else:
                raise AssertionError
            # end if
            plt.errorbar(alg_param_grid * xscale,
                         planing_loss_avg[i_config],
                         yerr=planing_loss_std[i_config] * ci_factor,
                         marker='.',
                         label=label_str)
            if show_stars:
                # Mark the lowest point:
                i_best = np.argmin(planing_loss_avg[i_config])
                plt.scatter(alg_param_grid[i_best] * xscale,
                            planing_loss_avg[i_config][i_best],
                            marker='*',
                            s=400)
            # end if
        # for i_config
        plt.grid(True)
        plt.ylabel('Loss')
        plt.legend(title=legend_title, loc='best',
                   fontsize=12)  # loc='upper right'
        if y_lim:
            plt.ylim(y_lim)
        # plt.xlim([0.5,1])
        # ax.set_yticks(np.arange(0., 9., step=1.))
        # plt.figure(figsize=(5.8, 3.0))  # set up figure size

        if save_PDF:
            save_fig(args.run_name)
        else:
            # plt.title('Loss +- 95% CI \n ' + str(args.args))
            plt.title(args.mdp_def['type'] + '  ' + args.run_name + ' \n ' +
                      args.result_dir,
                      fontsize=6)
        # end if save_PDF
    # end if
    pretty_print_args(args)
    if plot:
        plt.show()
    print('done')
    info_dict['result_dir'] = args.result_dir
    return info_dict
Beispiel #7
0
	write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args)
	return results_dict


# end  run_simulations

# -------------------------------------------------------------------------------------------

if __name__ == "__main__":
	# *********************************
	if run_mode == 'Load':
		args, results_dict = load_run_data(result_dir_to_load)
	# *********************************
	elif run_mode == 'New':

		hyper_grid_vals = get_grid(args.hyper_grid_def)
		create_result_dir(args)
		n_hyper_grid = len(hyper_grid_vals)
		n_reps = args.n_reps
		reg_types = args.reg_types

		# define search grids for regularization parameters
		reg_grids = dict()
		for reg_type in reg_types:
			reg_param_grid_def = args.search_grid_def[reg_type]
			reg_grids[reg_type] = get_grid(reg_param_grid_def)

		# init result matrix with nan (no result)
		loss = {reg_type: np.full((n_hyper_grid, len(reg_grids[reg_type]), n_reps), np.nan) for reg_type in reg_types}

		# run
Beispiel #8
0
    alg_param_grid = info_dict['alg_param_grid']
    result_reward_mat = info_dict['result_reward_mat']
    run_time = info_dict['run_time']
    n_grid = len(alg_param_grid)
    n_reps_per_point = np.full(shape=n_grid, fill_value=args.n_reps)
    print('Loaded parameters: \n', args, '\n', '-' * 20)

# --------------------------------------------------------------------------------------------------------------------#

elif run_mode == 'LoadSnapshot':
    print_status(result_dir_to_load)
    #  Load previous run
    args, info_dict = load_run_data(result_dir_to_load)
    print('Loaded parameters: \n', args, '\n', '-' * 20)
    args.result_dir = result_dir_to_load  # update the path, in case the result folder moved
    alg_param_grid = get_grid(args.param_grid_def)
    run_time = info_dict['run_time']
    path = os.path.join(result_dir_to_load, 'jobs')
    all_files = glob.glob(os.path.join(path, "*.p"))
    n_grid = len(alg_param_grid)
    n_reps = args.n_reps
    n_reps_per_point = np.full(shape=n_grid, fill_value=0, dtype=np.int)
    result_reward_mat = np.full(shape=(n_grid, n_reps), fill_value=np.nan)

    for f_path in all_files:
        save_dict = pickle.load(open(f_path, "rb"))
        job_info = save_dict['job_info']
        timesteps_snapshots = save_dict['timesteps_snapshots']
        evaluations = save_dict['evaluations']
        load_idx = np.searchsorted(timesteps_snapshots,
                                   int(timesteps_snapshot_to_load), 'right')