def run_simulations(args, save_result, local_mode, init_ray=True):
    if init_ray:
        start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]
    config_grid_vals = get_grid(args.config_grid_def)
    n_config_grid = len(config_grid_vals)
    planing_loss = np.zeros((n_reps, n_config_grid, n_grid))
    info_dict = {}
    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        args_r = deepcopy(args)
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid,
                                             args_r.config_grid_def, args_r)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # end for i_rep
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        if i_rep % max(n_reps // 100, 1) == 0:
            time_str = time.strftime(
                "%H hours, %M minutes and %S seconds",
                time.gmtime(timeit.default_timer() - start_time))
            write_to_log(
                'Finished: {} out of {} reps, time: {}'.format(
                    i_rep + 1, n_reps, time_str), args)
        # end if
        planing_loss[i_rep] = loss_rep
        info_dict = {
            'planing_loss_avg': planing_loss.mean(axis=0),
            'planing_loss_std': planing_loss.std(axis=0),
            'alg_param_grid': alg_param_grid,
            'n_reps_finished': i_rep + 1
        }
        if save_result:
            save_run_data(args, info_dict, verbose=0)
        # end if
    # end for i_rep
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args, save_result)
    return info_dict
Esempio n. 2
0
def run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode):
	start_ray(local_mode)
	write_to_log('local_mode == {}'.format(local_mode), args)
	SetMrpArgs(args)
	start_time = timeit.default_timer()
	set_random_seed(args.seed)

	reg_types = args.reg_types

	n_hyper_grid = len(hyper_grid_vals)
	n_reps = args.n_reps
	results_dict = dict()

	write_to_log('***** Starting  {} reps'.format(n_reps), args)
	for i_rep in range(n_reps):

		for i_hyper_grid, hyper_grid_val in enumerate(hyper_grid_vals):
			args_run = deepcopy(args)
			set_hyper_param(args_run, hyper_grid_val)

			# send jobs:
			out_ids = {reg_type: [None for _ in range(len(reg_grids[reg_type]))] for reg_type in reg_types}
			for reg_type in reg_types:

				for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]):
					# ray put
					if np.isnan(loss[reg_type][i_hyper_grid, i_reg_pram, i_rep]):
						out_ids[reg_type][i_reg_pram] = run_exp.remote(i_rep, args_run, reg_type, reg_param)
				# end if
			# end for i_reg_pram
			# end for reg_type

			# Gather results:
			for reg_type in reg_types:
				for i_reg_pram, reg_param in enumerate(reg_grids[reg_type]):
					# ray get
					if out_ids[reg_type][i_reg_pram] is not None:
						out = ray.get(out_ids[reg_type][i_reg_pram])
						loss[reg_type][i_hyper_grid, i_reg_pram, i_rep] = out
				# end if
			# end for i_reg_pram
		# end for reg_type
		# end for i_hyper_grid

		# Save results so far
		results_dict = {'hyper_grid_vals': hyper_grid_vals, 'loss': loss, 'reg_grids': reg_grids, 'n_reps_finished': i_rep + 1}
		save_run_data(args, results_dict, verbose=0)
		time_str = time.strftime("%H hours, %M minutes and %S seconds",  time.gmtime(timeit.default_timer() - start_time))
		write_to_log('Finished: {} out of {} reps, time: {}'.format(i_rep + 1, n_reps, time_str), args)

	# end for i_rep

	stop_time = timeit.default_timer()
	write_to_log('Total runtime: ' + time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args)
	return results_dict
def run_simulations(args, save_result, local_mode):
    args_def = deepcopy(args)
    start_ray(local_mode)
    if save_result:
        create_result_dir(args)
        write_to_log('local_mode == {}'.format(local_mode), args)

    start_time = timeit.default_timer()
    set_random_seed(args.seed)

    n_reps = args.n_reps
    param_val_grid = get_grid(args.param_grid_def)
    n_grid = param_val_grid.shape[0]

    config_grid = get_grid(args.config_grid_def)
    n_configs = len(config_grid)
    args.n_configs = n_configs

    loss_mat = np.zeros((n_reps, n_configs, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        loss_mat_rep_id = run_rep.remote(i_rep, param_val_grid, config_grid,
                                         args)
        loss_rep_id_lst.append(loss_mat_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps),
                     args)
        loss_mat[i_rep] = loss_rep
    # end for i_rep
    info_dict = {
        'loss_avg': loss_mat.mean(axis=0),
        'loss_std': loss_mat.std(axis=0),
        'param_val_grid': param_val_grid,
        'config_grid': config_grid
    }
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    write_to_log(
        ['-' * 10 + 'Defined args: ',
         pretty_print_args(args_def), '-' * 20], args)
    return info_dict
def run_simulations(args, local_mode):
    start_ray(local_mode)
    create_result_dir(args)
    write_to_log('local_mode == {}'.format(local_mode), args)
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    l2_grid = get_grid(args.l2_grid_def)
    gam_grid = get_grid(args.gam_grid_def)
    write_to_log('gamma_grid == {}'.format(gam_grid), args)
    write_to_log('l2_grid == {}'.format(l2_grid), args)
    grid_shape = (len(l2_grid), len(gam_grid))
    loss_avg = np.zeros(grid_shape)
    loss_std = np.zeros(grid_shape)

    run_idx = 0
    for i0 in range(grid_shape[0]):
        for i1 in range(grid_shape[1]):
            args_run = deepcopy(args)
            args_run.param_grid_def = {
                'type': 'L2_factor',
                'spacing': 'list',
                'list': [l2_grid[i0]]
            }
            args_run.default_gamma = gam_grid[i1]

            info_dict = run_main_control(args_run,
                                         save_result=False,
                                         plot=False,
                                         init_ray=False)
            loss_avg[i0, i1] = info_dict['planing_loss_avg'][0]
            loss_std[i0, i1] = info_dict['planing_loss_std'][0]
            run_idx += 1
            print("Finished {}/{}".format(run_idx, loss_avg.size))
        # end for
    # end for
    grid_results_dict = {
        'l2_grid': l2_grid,
        'gam_grid': gam_grid,
        'loss_avg': loss_avg,
        'loss_std': loss_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
Esempio n. 5
0
		n_hyper_grid = len(hyper_grid_vals)
		n_reps = args.n_reps
		reg_types = args.reg_types

		# define search grids for regularization parameters
		reg_grids = dict()
		for reg_type in reg_types:
			reg_param_grid_def = args.search_grid_def[reg_type]
			reg_grids[reg_type] = get_grid(reg_param_grid_def)

		# init result matrix with nan (no result)
		loss = {reg_type: np.full((n_hyper_grid, len(reg_grids[reg_type]), n_reps), np.nan) for reg_type in reg_types}

		# run
		results_dict = run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode)
		save_run_data(args, results_dict)
	# *********************************
	elif run_mode == 'Continue':

		loaded_args, loaded_results_dict = load_run_data(result_dir_to_load)
		args = loaded_args
		args.result_dir = result_dir_to_load  # update the path, in case the result folder moved
		hyper_grid_vals = loaded_results_dict['hyper_grid_vals']
		loss = loaded_results_dict['loss']
		reg_grids = loaded_results_dict['reg_grids']
		results_dict = run_simulation(args, hyper_grid_vals, loss, reg_grids, local_mode)
		save_run_data(args, results_dict)
	# *********************************
	else:
		raise AssertionError('Unrecognized run_mode')
	# *********************************
Esempio n. 6
0
                    result_reward_mat[i_grid, i_rep]):
                continue  # skip
            output, args_run = ray.get(out_id[i_grid][i_rep])
            result_reward_mat[i_grid, i_rep] = output
            # note: the final reward is an average performance on eval_episodes=10 of final policy
            write_to_log(
                f'Finished Rep: {i_rep + 1}/{n_reps_per_point[i_grid]} of Grid point {i_grid}/{len(alg_param_grid)}'
                f' ({args_run.job_name}), Reward : {output}, Time now: {time_now()}',
                args)
            # Save results so far:
            stop_time = timeit.default_timer()
            run_time += stop_time - start_time
            start_time = timeit.default_timer()
            save_run_data(args, {
                'alg_param_grid': alg_param_grid,
                'result_reward_mat': result_reward_mat,
                'run_time': run_time
            },
                          verbose=1)
    # end for i_grid
    # end for i_rep

    write_to_log(
        'Total runtime: ' + time.strftime(
            "%H hours, %M minutes and %S seconds", time.gmtime(run_time)),
        args)

# --------------------------------------------------------------------------------------------------------------------#
#  Plot results
# --------------------------------------------------------------------------------------------------------------------#

mean_reward = []