コード例 #1
0
def run_simulations(args, local_mode):
    import ray
    ray.init(local_mode=local_mode)
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    l2_grid = np.around(get_grid(args.l2_grid_def), decimals=4)
    gam_grid = np.around(get_grid(args.gam_grid_def), decimals=4)
    grid_shape = (len(l2_grid), len(gam_grid))
    loss_avg = np.zeros(grid_shape)
    loss_std = np.zeros(grid_shape)

    run_idx = 0
    for i0 in range(grid_shape[0]):
        for i1 in range(grid_shape[1]):
            args_run = deepcopy(args)
            args_run.param_grid_def = {
                'type': 'L2_factor',
                'spacing': 'list',
                'list': [l2_grid[i0]]
            }
            args_run.default_gamma = gam_grid[i1]

            info_dict = run_main(args_run, save_result=False, plot=False)
            loss_avg[i0, i1] = info_dict['planing_loss_avg'][0]
            loss_std[i0, i1] = info_dict['planing_loss_std'][0]
            run_idx += 1
            print("Finished {}/{}".format(run_idx, loss_avg.size))
        # end for
    # end for
    grid_results_dict = {
        'l2_grid': l2_grid,
        'gam_grid': gam_grid,
        'loss_avg': loss_avg,
        'loss_std': loss_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
コード例 #2
0
ファイル: main_ray.py プロジェクト: ronamit/ray
        args.param_grid_def = new_param_grid_def
    n_grid = len(new_alg_param_grid)
    mean_R = np.full(n_grid, np.nan)
    std_R = np.full(n_grid, np.nan)
    # now take completed results from loaded data:
    for i_grid, alg_param in enumerate(new_alg_param_grid):
        if alg_param in loaded_alg_param_grid:
            load_idx = np.nonzero(loaded_alg_param_grid == alg_param)
            mean_R[i_grid] = info_dict['mean_R'][load_idx]
            std_R[i_grid] = info_dict['std_R'][load_idx]
    if np.all(np.isnan(mean_R)):
        raise Warning(
            'Loaded file  {} did not complete any of the desired grid points'.
            format(result_dir_to_load))
    write_to_log(
        'Continue run with new grid def {}, {}'.format(new_param_grid_def,
                                                       time_now()), args)
    write_to_log('Run parameters: \n' + str(args) + '\n' + '-' * 20, args)
    alg_param_grid = new_alg_param_grid

else:
    # Start from scratch
    create_result_dir(args)
    alg_param_grid = np.around(get_grid(args.param_grid_def), decimals=10)
    n_gammas = len(alg_param_grid)
    mean_R = np.full(n_gammas, np.nan)
    std_R = np.full(n_gammas, np.nan)

if run_mode in {'New', 'Continue', 'ContinueNewGrid', 'ContinueAddGrid'}:
    # Run grid
    ray.init(local_mode=local_mode)
コード例 #3
0
def run_simulations(args, save_result, local_mode):
    import ray
    ray.init(local_mode=local_mode)
    # A Ray remote function.
    # runs a single repetition of the experiment
    @ray.remote  # (num_cpus=0.2)  # specify how much resources the process needs
    def run_rep(i_rep, alg_param_grid, config_grid, args):
        nS = args.nS
        if args.initial_state_distrb_type == 'middle':
            args.initial_state_distrb = np.zeros(nS)
            args.initial_state_distrb[nS // 2] = 1.
        elif args.initial_state_distrb_type == 'uniform':
            args.initial_state_distrb = np.ones(nS) / nS

        initial_state_distrb = args.initial_state_distrb
        n_grid = alg_param_grid.shape[0]
        n_configs = args.n_configs
        loss_rep = np.zeros((n_configs, n_grid))

        # default values
        gammaEval = args.gammaEval
        if args.default_gamma is None:
            gamma_guidance = gammaEval
        else:
            gamma_guidance = args.default_gamma
        l2_fp = 1e-5
        l2_proj = args.default_l2_proj

        for i_config in range(args.n_configs):  # grid of n_configs

            n_traj = args.default_n_trajectories
            if args.config_grid_def['type'] == 'n_trajectories':
                n_traj = config_grid[i_config]
            elif args.config_grid_def['type'] == 'trajectory_len':
                args.depth = config_grid[i_config]
            elif args.config_grid_def['type'] == 'p_left':
                args.mrp_def['p_left'] = config_grid[i_config]

            # Generate MDP:
            M = MRP(args)

            for i_grid, alg_param in enumerate(alg_param_grid):

                # grid values:
                if args.param_grid_def['type'] == 'l2_proj':
                    l2_proj = alg_param
                elif args.param_grid_def['type'] == 'l2_fp':
                    l2_fp = alg_param
                elif args.param_grid_def['type'] == 'gamma_guidance':
                    gamma_guidance = alg_param
                elif args.param_grid_def['type'] == 'l2_factor':
                    l2_fp = alg_param
                    l2_proj = alg_param
                else:
                    raise ValueError('Unrecognized args.grid_type')

                if args.alg_type not in ['LSTD_Nested', 'LSTD_Nested_Standard']\
                        and args.param_grid_def['type'] == 'l2_fp':
                    raise Warning(args.alg_type + ' does not use l2_fp !!!')

                V_true = np.linalg.solve((np.eye(nS) - gammaEval * M.P), M.R)

                # Generate data:
                data = M.SampleData(n_traj, args.depth, p0=initial_state_distrb, reward_std=args.reward_std,
                                    sampling_type=args.sampling_type)

                # value estimation:
                if args.alg_type == 'LSTD':
                    V_est = LSTD(data, gamma_guidance, args, l2_factor=l2_proj)
                elif args.alg_type == 'LSTD_Nested':
                    V_est = LSTD_Nested(data, gamma_guidance, args, l2_proj, l2_fp)

                elif args.alg_type == 'LSTD_Nested_Standard':
                    V_est = LSTD_Nested_Standard(data, gamma_guidance, args, l2_proj, l2_fp)
                elif args.alg_type == 'batch_TD_value_evaluation':
                    V_est = batch_TD_value_evaluation(data, gamma_guidance, args, l2_factor=l2_proj)
                else:
                    raise ValueError('Unrecognized args.grid_type')
                loss_type = args.evaluation_loss_type
                pi = None
                eval_loss = evaluate_value_estimation(loss_type, V_true, V_est, M, pi, gammaEval, gamma_guidance)
                loss_rep[i_config, i_grid] = eval_loss
            # end for i_grid
        #  end for i_config
        return loss_rep
    # end run_rep

    start_time = timeit.default_timer()
    if save_result:
        create_result_dir(args)
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]

    config_grid = get_grid(args.config_grid_def)
    n_configs = len(config_grid)
    args.n_configs = n_configs

    planing_loss = np.zeros((n_reps, n_configs, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid, config_grid, args)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps), args)
        planing_loss[i_rep] = loss_rep
    # end for i_rep
    info_dict = {'planing_loss_avg': planing_loss.mean(axis=0), 'planing_loss_std': planing_loss.std(axis=0),
                 'alg_param_grid': alg_param_grid, 'config_grid': config_grid}
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log('Total runtime: ' +
                 time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args)
    return info_dict
コード例 #4
0
def run_simulation(args):
    import ray
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    k_grid = np.arange(1, 6)
    n_grid = len(k_grid)
    no_reg_err_mean = np.zeros(n_grid)
    no_reg_err_std = np.zeros(n_grid)
    best_gamma_err_mean = np.zeros(n_grid)
    best_gamma_err_std = np.zeros(n_grid)
    best_l2_err_mean = np.zeros(n_grid)
    best_l2_err_std = np.zeros(n_grid)

    for i_k, k in enumerate(k_grid):
        args_run = deepcopy(args)
        args_run.mdp_def['k'] = k

        # Run gamma grid
        args_run.param_grid_def = {
            'type': 'gamma_guidance',
            'spacing': 'linspace',
            'start': 0.1,
            'stop': 0.99,
            'num': 50
        }
        alg_param_grid = get_grid(args_run.param_grid_def)
        info_dict = run_main(args_run, save_result=False)
        planing_loss_avg = info_dict['planing_loss_avg']
        planing_loss_std = info_dict['planing_loss_std']
        # Mark the best gamma:
        i_best = np.argmin(planing_loss_avg[0])
        best_gamma_err_mean[i_k] = planing_loss_avg[0][i_best]
        best_gamma_err_std[i_k] = planing_loss_std[0][i_best]

        args_run.param_grid_def = {
            'type': 'L2_factor',
            'spacing': 'linspace',
            'start': 0.0,
            'stop': 0.01,
            'num': 50
        }
        alg_param_grid = get_grid(args_run.param_grid_def)
        info_dict = run_main(args_run, save_result=False)
        planing_loss_avg = info_dict['planing_loss_avg']
        planing_loss_std = info_dict['planing_loss_std']
        # Mark the best gamma:
        i_best = np.argmin(planing_loss_avg[0])
        best_l2_err_mean[i_k] = planing_loss_avg[0][i_best]
        best_l2_err_std[i_k] = planing_loss_std[0][i_best]

        no_reg_err_mean[i_k] = planing_loss_avg[0][0]
        no_reg_err_std = planing_loss_std[0][0]
    # end for
    grid_results_dict = {
        'k_grid': k_grid,
        'best_gamma_err_mean': best_gamma_err_mean,
        'best_gamma_err_std': best_gamma_err_std,
        'best_l2_err_mean': best_l2_err_mean,
        'best_l2_err_std': best_l2_err_std,
        'no_reg_err_mean': no_reg_err_mean,
        'no_reg_err_std': no_reg_err_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
コード例 #5
0
ファイル: run_net_depth.py プロジェクト: ronamit/ray
    ray.init(local_mode=local_mode)
    start_time = timeit.default_timer()

    for i_net_depth, net_depth in enumerate(net_depth_grid):
        for i_param_config, param_config in enumerate(param_configs_grid):

            if not np.isnan(mean_R[i_net_depth, i_param_config]):
                continue  # this index already completed

            set_random_seed(args.seed)
            gamma_guidance = param_config['gamma']
            l2_factor = param_config['L2']
            run_name = 'net-depth: {}, Config: {}'.format(
                net_depth, param_config['name'])
            write_to_log(
                'Starting: ' + run_name + ', time: {}'.format(time_now()),
                args)

            critic_hiddens = [400] + [300] * (net_depth - 1)

            # Training
            analysis = tune.run(
                CustomTrainer,
                name=run_name,
                num_samples=args.n_reps,
                stop={"timesteps_total": args.timesteps_total},
                config={
                    "env": args.env,
                    "num_gpus": 0.1,
                    # === Algorithm ===
                    "gamma": gamma_guidance,
コード例 #6
0
ファイル: main.py プロジェクト: incognito01231/discount_reg
    for i_grid, alg_param in enumerate(alg_param_grid):
        if not np.isnan(mean_R[i_grid]):
            continue  # this index already completed
        set_random_seed(args.seed)

        if args.param_grid_def['type'] == 'L2_factor':
            l2_factor = alg_param
            run_name = 'L2_' + str(l2_factor)
        elif args.param_grid_def['type'] == 'gamma_guidance':
            gamma_guidance = alg_param
            run_name = 'Gamma_' + str(alg_param)
        else:
            raise ValueError('Unrecognized args.grid_type')

        write_to_log('Starting: {}, time: {}'.format(run_name, time_now()),
                     args)

        for i_rep in range(args.n_reps):
            seed = args.seed + i_rep
            # Training
            analysis = tune.run(
                CustomTrainer,
                name=run_name,
                num_samples=1,
                random_state_seed=2,
                stop={"timesteps_total": args.timesteps_total},
                config={
                    "env": args.env,
                    "num_gpus": 0.1,
                    # === Algorithm ===
                    "gamma": gamma_guidance,
コード例 #7
0
def run_simulations(args, save_result, local_mode):
    import ray
    ray.init(local_mode=local_mode, ignore_reinit_error=True),
    # A Ray remote function.
    # Runs a single repetition of the experiment
    @ray.remote
    def run_rep(i_rep, alg_param_grid, n_traj_grid, args_r):
        traj_grid_len = len(n_traj_grid)
        n_grid = len(alg_param_grid)

        # runs a single repetition of the experiment
        loss_rep = np.zeros((traj_grid_len, n_grid))

        # default values
        gammaEval = args_r.gammaEval
        if args_r.default_gamma is None:
            gamma_guidance = gammaEval
        else:
            gamma_guidance = args_r.default_gamma
        l2_factor = None
        l1_factor = None

        # Generate MDP:
        M = MDP(args_r)

        # Optimal policy for the MDP:
        pi_opt, V_opt, Q_opt = PolicyIteration(M, gammaEval)

        for i_grid, alg_param in enumerate(alg_param_grid):

            if args_r.param_grid_def['type'] == 'L2_factor':
                l2_factor = alg_param
            elif args_r.param_grid_def['type'] == 'L1_factor':
                l1_factor = alg_param
            elif args_r.param_grid_def['type'] == 'gamma_guidance':
                gamma_guidance = alg_param
            else:
                raise ValueError('Unrecognized args.grid_type')

            for i_n_traj, n_traj in enumerate(
                    args_r.n_traj_grid
            ):  # grid of number of trajectories to generate
                if args_r.method == 'Expected_SARSA':
                    pi_t = ExpectedSARSA_Learning(args_r, M, n_traj,
                                                  gamma_guidance, l2_factor,
                                                  l1_factor)
                elif args_r.method == 'Model_Based':
                    pi_t = ModelBasedLearning(args_r, M, n_traj,
                                              gamma_guidance)
                elif args_r.method == 'SARSA':
                    pi_t = SARSA_Learning(args_r, M, n_traj, gamma_guidance)
                else:
                    raise ValueError('unrecognized method')
                # Evaluate performance of policy:
                V_t, _ = PolicyEvaluation(M, pi_t, gammaEval)
                loss_rep[i_n_traj, i_grid] = (np.abs(V_opt - V_t)).mean()
            # end for i_n_traj
        #  end for i_grid
        return loss_rep

    # end run_rep
    # --------------------------------------------------
    start_time = timeit.default_timer()
    if save_result:
        create_result_dir(args)
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]
    traj_grid_len = len(args.n_traj_grid)
    planing_loss = np.zeros((n_reps, traj_grid_len, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid,
                                             args.n_traj_grid, args)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps),
                     args)
        planing_loss[i_rep] = loss_rep
    # end for i_rep
    info_dict = {
        'planing_loss_avg': planing_loss.mean(axis=0),
        'planing_loss_std': planing_loss.std(axis=0),
        'alg_param_grid': alg_param_grid
    }
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return info_dict