def generate_discrete_prob_TV(args,
                              n,
                              tv_dist_nrml=0.,
                              tol=1e-1,
                              max_iter=5e6):
    """
    Randomly generates a discrete distribution of dimension n with L1 distance of d from the uniform distribution.
    assumption tv_dist is normalized in [0,1]
    """
    assert 0 <= tv_dist_nrml <= 1

    if tv_dist_nrml == 0.:
        return np.ones(n) / n

    tv_dist_max = 0.5 * 2 * (n - 1) / n  # note: tv_dist = 0.5 * L1 dist
    tv_dist = tv_dist_nrml * tv_dist_max  # de-normalize

    done = False
    i = 0
    probs = None
    best_probs = None
    best_err = float('inf')
    l1_dist = 2 * tv_dist  # the desired L1 dist
    best_tv_dist_nrml = None
    while not done:
        i += 1
        # probs = sample_simplex(n)   # unifromly
        probs = draw_prob_at_dist(n, l1_dist)
        #  Check that we indeed got the correct L1 distance from a uniform distrib
        curr_l1_dist = np.sum(np.abs(probs - 1 / n))
        curr_tv_dist_nrml = 0.5 * curr_l1_dist / tv_dist_max
        curr_err = np.abs(curr_tv_dist_nrml - tv_dist_nrml)
        if curr_err < tol:
            done = True
        if curr_err < best_err:
            best_err = curr_err
            best_probs = probs
            best_tv_dist_nrml = curr_tv_dist_nrml
        if i >= max_iter:
            write_to_log([
                'rejection sampling failed -',
                'desired tv_dist [normalized]: ', tv_dist_nrml,
                ', best_tv_dist_nrml: ', best_tv_dist_nrml, 'best_err: ',
                best_err
            ], args)
            probs = best_probs
            done = True
        # if i >= max_iter:
        #     write_to_log(['n ', n, 'tv_dist ', tv_dist, 'tol ', tol, 'i ', i,
        #     'desired l1_dist', l1_dist, 'last l1_dist', curr_l1_dist], args)
        #     raise AssertionError('rejection sampling failed')
    # print(i)
    return probs
Ejemplo n.º 2
0
def run_simulation_remote(args_run, job_name, job_info):
    if args_run.alg in {'TD3', 'OurDDPG'}:
        from TD3_Code.mainTD3 import run_simulation_TD3
        write_to_log('Starting: {}, time: {}'.format(job_name, time_now()),
                     args_run)
        args_run.run_name += ':' + job_name
        return run_simulation_TD3(args_run, job_info), args_run
    # elif  args_run.alg == 'SAC':
    # 	from SAC_Code.main_SAC import run_simulation_SAC
    # 	write_to_log('Starting: {}, time: {}'.format(job_name, time_now()), args_run)
    # 	args_run.run_name += ':' + job_name
    # 	return run_simulation_SAC(args_run, job_info), args_run
    else:
        raise AssertionError
Ejemplo n.º 3
0
def run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode):
	start_ray(local_mode)
	write_to_log('local_mode == {}'.format(local_mode), args)
	SetMrpArgs(args)
	start_time = timeit.default_timer()
	set_random_seed(args.seed)

	reg_types = args.reg_types

	n_hyper_grid = len(hyper_grid_vals)
	n_reps = args.n_reps
	results_dict = dict()

	write_to_log('***** Starting  {} reps'.format(n_reps), args)
	for i_rep in range(n_reps):

		for i_hyper_grid, hyper_grid_val in enumerate(hyper_grid_vals):
			args_run = deepcopy(args)
			set_hyper_param(args_run, hyper_grid_val)

			# send jobs:
			out_ids = {reg_type: [None for _ in range(len(reg_grids[reg_type]))] for reg_type in reg_types}
			for reg_type in reg_types:

				for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]):
					# ray put
					if np.isnan(loss[reg_type][i_hyper_grid, i_reg_pram, i_rep]):
						out_ids[reg_type][i_reg_pram] = run_exp.remote(i_rep, args_run, reg_type, reg_param)
				# end if
			# end for i_reg_pram
			# end for reg_type

			# Gather results:
			for reg_type in reg_types:
				for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]):
					# ray get
					if out_ids[reg_type][i_reg_pram] is not None:
						out = ray.get(out_ids[reg_type][i_reg_pram])
						loss[reg_type][i_hyper_grid, i_reg_pram, i_rep] = out
				# end if
			# end for i_reg_pram
		# end for reg_type
		# end for i_hyper_grid

		# Save results so far
		results_dict = {'hyper_grid_vals': hyper_grid_vals, 'loss': loss, 'reg_grids': reg_grids, 'n_reps_finished': i_rep + 1}
		save_run_data(args, results_dict, verbose=0)
		time_str = time.strftime("%H hours, %M minutes and %S seconds",  time.gmtime(timeit.default_timer() - start_time))
		write_to_log('Finished: {} out of {} reps, time: {}'.format(i_rep + 1, n_reps, time_str), args)

	# end for i_rep

	stop_time = timeit.default_timer()
	write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args)
	return results_dict
Ejemplo n.º 4
0
def print_status(result_dir_to_load):

    args, info_dict = load_run_data(result_dir_to_load)
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = len(alg_param_grid)
    n_reps = args.n_reps
    print('Loaded parameters: \n', args, '\n', '-' * 20)
    if 'result_reward_mat' in info_dict.keys():
        result_reward_mat = info_dict['result_reward_mat']
    else:
        result_reward_mat = np.full(shape=(n_grid, n_reps), fill_value=np.nan)
    path = os.path.join(result_dir_to_load, 'jobs')
    all_files = glob.glob(os.path.join(path, "*.p"))
    n_rep_finish_per_point = np.full(shape=n_grid, fill_value=0, dtype=np.int)
    n_steps_finished = np.full(shape=(n_grid, n_reps),
                               fill_value=-1,
                               dtype=np.int)
    for f_path in all_files:
        save_dict = pickle.load(open(f_path, "rb"))
        job_info = save_dict['job_info']
        timesteps_snapshots = save_dict['timesteps_snapshots']
        i_grid = np.searchsorted(alg_param_grid, job_info['grid_param'],
                                 'right')
        if i_grid == len(alg_param_grid):
            # the loaded param is not in our defined alg_param_grid
            continue
            # TODO: add option to load all files, even if not in the defined alg_param_grid (show_all_saved_results flag)
        i_rep = job_info['i_rep']
        if not np.isnan(result_reward_mat[i_grid, i_rep]):
            n_steps_finished[i_grid, i_rep] = args.max_timesteps
            n_rep_finish_per_point[i_grid] += 1
        else:
            n_steps_finished[i_grid, i_rep] = timesteps_snapshots[-1]

    for i_grid, grid_param in enumerate(alg_param_grid):
        write_to_log(
            'Grid point {}/{}, val: {}, Number of finished reps loaded: {}'.
            format(1 + i_grid, len(alg_param_grid), grid_param,
                   n_rep_finish_per_point[i_grid]), args)
        for i_rep in range(n_reps):
            if n_steps_finished[i_grid, i_rep] != -1:
                print('Rep: {}, Finished Time-Steps: {}'.format(
                    i_rep, n_steps_finished[i_grid, i_rep]))
def generate_prob_given_entropy(args, n, ent_nrml, tol=1e-1, max_iter=5e6):
    """
    Randomly generates a discrete distribution of dimension n with given entropy ent.
    assumption ent is normalized in [0,1]
    """
    assert 0. <= ent_nrml <= 1.

    if ent_nrml == 1.:
        return np.ones(n) / n

    ent_max = np.log2(n)
    ent = ent_nrml * ent_max  # de-normalize

    # use rejection sampling
    done = False
    i = 0
    probs = None
    best_probs = None
    best_err = float('inf')
    best_ent_nrml = None
    while not done:
        i += 1
        probs = sample_simplex(n)
        curr_ent = sps.entropy(probs, base=2)
        curr_ent_nrml = curr_ent / ent_max
        cur_err = np.abs(ent_nrml - curr_ent_nrml)
        if cur_err < best_err:
            best_err = cur_err
            best_probs = probs
            best_ent_nrml = curr_ent_nrml
        if cur_err < tol:
            done = True
        if i >= max_iter:
            write_to_log([
                'rejection sampling failed -', 'desired ent: ', ent,
                'best_err: ', best_err, ', best_ent_nrml: ', best_ent_nrml
            ], args)
            probs = best_probs
            done = True
            # write_to_log(['n ', n, 'ent ', ent, 'tol ', tol, 'i ', i, 'last ent', curr_ent], args)
            # raise AssertionError('rejection sampling failed')
    return probs
Ejemplo n.º 6
0
def run_simulations(args, save_result, local_mode):
    args_def = deepcopy(args)
    start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    param_val_grid = get_grid(args.param_grid_def)
    n_grid = param_val_grid.shape[0]

    config_grid = get_grid(args.config_grid_def)
    n_configs = len(config_grid)
    args.n_configs = n_configs

    loss_mat = np.zeros((n_reps, n_configs, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid,
                                         args)
        loss_rep_id_lst.append(loss_mat_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps),
                     args)
        loss_mat[i_rep] = loss_rep
    # end for i_rep
    info_dict = {
        'loss_avg': loss_mat.mean(axis=0),
        'loss_std': loss_mat.std(axis=0),
        'param_val_grid': param_val_grid,
        'config_grid': config_grid
    }
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    write_to_log(
        ['-' * 10 + 'Defined args: ',
         pretty_print_args(args_def), '-' * 20], args)
    return info_dict
def run_simulations(args, local_mode):
    start_ray(local_mode)
    create_result_dir(args)
    write_to_log('local_mode == {}'.format(local_mode), args)
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    l2_grid = get_grid(args.l2_grid_def)
    gam_grid = get_grid(args.gam_grid_def)
    write_to_log('gamma_grid == {}'.format(gam_grid), args)
    write_to_log('l2_grid == {}'.format(l2_grid), args)
    grid_shape = (len(l2_grid), len(gam_grid))
    loss_avg = np.zeros(grid_shape)
    loss_std = np.zeros(grid_shape)

    run_idx = 0
    for i0 in range(grid_shape[0]):
        for i1 in range(grid_shape[1]):
            args_run = deepcopy(args)
            args_run.param_grid_def = {
                'type': 'L2_factor',
                'spacing': 'list',
                'list': [l2_grid[i0]]
            }
            args_run.default_gamma = gam_grid[i1]

            info_dict = run_main_control(args_run,
                                         save_result=False,
                                         plot=False,
                                         init_ray=False)
            loss_avg[i0, i1] = info_dict['planing_loss_avg'][0]
            loss_std[i0, i1] = info_dict['planing_loss_std'][0]
            run_idx += 1
            print("Finished {}/{}".format(run_idx, loss_avg.size))
        # end for
    # end for
    grid_results_dict = {
        'l2_grid': l2_grid,
        'gam_grid': gam_grid,
        'loss_avg': loss_avg,
        'loss_std': loss_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
def run_simulations(args, save_result, local_mode, init_ray=True):
    if init_ray:
        start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]
    config_grid_vals = get_grid(args.config_grid_def)
    n_config_grid = len(config_grid_vals)
    planing_loss = np.zeros((n_reps, n_config_grid, n_grid))
    info_dict = {}
    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        args_r = deepcopy(args)
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid,
                                             args_r.config_grid_def, args_r)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # end for i_rep
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        if i_rep % max(n_reps // 100, 1) == 0:
            time_str = time.strftime(
                "%H hours, %M minutes and %S seconds",
                time.gmtime(timeit.default_timer() - start_time))
            write_to_log(
                'Finished: {} out of {} reps, time: {}'.format(
                    i_rep + 1, n_reps, time_str), args)
        # end if
        planing_loss[i_rep] = loss_rep
        info_dict = {
            'planing_loss_avg': planing_loss.mean(axis=0),
            'planing_loss_std': planing_loss.std(axis=0),
            'alg_param_grid': alg_param_grid,
            'n_reps_finished': i_rep + 1
        }
        if save_result:
            save_run_data(args, info_dict, verbose=0)
        # end if
    # end for i_rep
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args, save_result)
    return info_dict
Ejemplo n.º 9
0
        else:
            n_reps_per_point[i_grid] = finished_reps_per_point[i_grid]

    # now take completed results from loaded data:
    result_reward_mat = np.full((n_grid, np.max(n_reps_per_point)), np.nan)
    for i_grid, grid_param in enumerate(new_alg_param_grid):
        if grid_param in loaded_alg_param_grid:
            load_idx = np.nonzero(loaded_alg_param_grid == grid_param)
            for i_rep in range(finished_reps_per_point[i_grid]):
                result_reward_mat[i_grid, i_rep] = loaded_result_mat[load_idx,
                                                                     i_rep]
            # end for i_rep
        # end if
    # end for i_grid
    write_to_log(
        'Continue run with new grid def {}, {}'.format(new_param_grid_def,
                                                       time_now()), args)
    write_to_log('Run parameters: \n' + str(args) + '\n' + '-' * 20, args)
    pretty_print_args(args)
    alg_param_grid = new_alg_param_grid
# --------------------------------------------------------------------------------------------------------------------#

elif run_mode == 'New':
    # Start from scratch
    run_time = 0
    create_result_dir(args)
    os.makedirs(os.path.join(args.result_dir, 'jobs'))
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = len(alg_param_grid)
    n_reps = args.n_reps
    n_reps_per_point = np.full(shape=n_grid, fill_value=n_reps, dtype=np.int)