Esempio n. 1
0
def test(model_file, test_file, device=-1):
    context = utils.Saver.load_context(model_file)
    if context.seed is not None:
        utils.set_random_seed(context.seed, device)

    test_dataset = context.loader.load(test_file, train=False, bucketing=True)
    kwargs = dict(context)
    if context.model_config is not None:
        kwargs.update(context.model_config)
    model = _build_parser(**dict(kwargs))
    chainer.serializers.load_npz(model_file, model)
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu(device)

    pbar = training.listeners.ProgressBar(lambda n: tqdm(total=n))
    pbar.init(len(test_dataset))
    evaluator = Evaluator(model, context.loader.rel_map, test_file,
                          logging.getLogger())
    utils.chainer_train_off()
    for batch in test_dataset.batch(context.batch_size,
                                    colwise=True,
                                    shuffle=False):
        xs, ts = batch[:-1], batch[-1]
        ys = model.forward(*xs)
        evaluator.on_batch_end({'train': False, 'xs': xs, 'ys': ys, 'ts': ts})
        pbar.update(len(ts))
    evaluator.on_epoch_validate_end({})
Esempio n. 2
0
def test(model_file, test_file, device=-1):
    context = utils.Saver.load_context(model_file)
    if context.seed is not None:
        utils.set_random_seed(context.seed, device)

    test_dataset = context.loader.load(test_file, train=False, bucketing=True)
    model = _build_parser(**dict(context))
    chainer.serializers.load_npz(model_file, model)
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu(device)

    pbar = training.listeners.ProgressBar(lambda n: tqdm(total=n))
    pbar.init(len(test_dataset))
    evaluator = Evaluator(model, context.loader.rel_map, test_file,
                          Log.getLogger())
    utils.chainer_train_off()
    for batch in test_dataset.batch(context.batch_size,
                                    colwise=True,
                                    shuffle=False):
        xs, ts = batch[:-1], batch[-1]
        parsed = model.parse(*xs)
        evaluator.append([tokens[1:] for tokens in xs[-1]], parsed)
        pbar.update(len(ts))
    evaluator.report(show_details=False)
Esempio n. 3
0
def main(ckpt_path, dataset_path, data_split, batch_size, device):
    # Load model config and set random seed
    model_dir = ckpt_path.parent.parent
    cfg = load_model_config(model_dir)
    set_random_seed(cfg.random_seed)

    # Load tokenizer config
    dataset_cfg = Box.from_yaml(filename=cfg.dataset_dir / 'config.yaml')
    tokenizer_cfg = dataset_cfg.tokenizer

    # Determine dataset path. If dataset_path is not given, then the training datasets
    # will be used, and data_split specifies which set to use.
    if not dataset_path:
        dataset_path = cfg.dataset_dir / f'{data_split}.pkl'
    print(f'[-] Dataset: {dataset_path}')
    print(f'[-] Model checkpoint: {ckpt_path}\n')

    # Create data loader
    if batch_size:
        cfg.data_loader.batch_size = batch_size
    data_loader = create_data_loaders(cfg.data_loader,
                                      tokenizer_cfg,
                                      dataset_path,
                                      is_train=False)

    # Set torch device and create model
    device = get_torch_device(device, cfg.get('device'))
    cfg.net.pretrained_model_name_or_path = tokenizer_cfg.pretrained_model_name_or_path
    model = create_model(cfg, device, ckpt_path=ckpt_path)

    # Make predictions and save the results
    answers, na_probs, predictions = predict(device, data_loader, model)
    prediction_dir = model_dir / 'predictions'
    if not prediction_dir.exists():
        prediction_dir.mkdir(parents=True)
        print(f'[-] Predictions directory created at {prediction_dir}\n')
    prediction_path_prefix = f'{ckpt_path.stem}_{dataset_path.stem}'
    save_object(answers,
                prediction_dir / f'{prediction_path_prefix}_answer.json')
    save_object(predictions,
                prediction_dir / f'{prediction_path_prefix}_prediction.json')
    save_object(
        na_probs, prediction_dir /
        f'{prediction_path_prefix}_{ckpt_path.stem}_na_prob.json')
Esempio n. 4
0
def run_simulations(args, local_mode):
    import ray
    ray.init(local_mode=local_mode)
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    l2_grid = np.around(get_grid(args.l2_grid_def), decimals=4)
    gam_grid = np.around(get_grid(args.gam_grid_def), decimals=4)
    grid_shape = (len(l2_grid), len(gam_grid))
    loss_avg = np.zeros(grid_shape)
    loss_std = np.zeros(grid_shape)

    run_idx = 0
    for i0 in range(grid_shape[0]):
        for i1 in range(grid_shape[1]):
            args_run = deepcopy(args)
            args_run.param_grid_def = {
                'type': 'L2_factor',
                'spacing': 'list',
                'list': [l2_grid[i0]]
            }
            args_run.default_gamma = gam_grid[i1]

            info_dict = run_main(args_run, save_result=False, plot=False)
            loss_avg[i0, i1] = info_dict['planing_loss_avg'][0]
            loss_std[i0, i1] = info_dict['planing_loss_std'][0]
            run_idx += 1
            print("Finished {}/{}".format(run_idx, loss_avg.size))
        # end for
    # end for
    grid_results_dict = {
        'l2_grid': l2_grid,
        'gam_grid': gam_grid,
        'loss_avg': loss_avg,
        'loss_std': loss_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
Esempio n. 5
0
def main(_run, seed, loader_name, model_name, output_path, comt, run_on,
         dsets):
    # path
    output_path = create_output_path(output_path)

    pprint('output path:', output_path)
    model_path = output_path + '/model.bin'
    # pred_path = output_path+'/pred_%s_%d.pkl' %(model_name,seed)
    pprint('create loader `%s` and model `%s`...' % (loader_name, model_name))

    ###### Daily Model and Data Prepare #########
    set_random_seed(seed)
    global day_loader
    day_loader = getattr(loaders_module, loader_name).DataLoader(dset=dsets[0])
    day_model_1 = getattr(models_module, model_name).Day_Model_1()
    super_model = getattr(models_module, model_name)
    pprint(f'''
        Day_Model_1: {count_num_params(super_model.Day_Model_1())}, 
        Day_model_2: {count_num_params(super_model.Day_Model_2())}, Digger:{count_num_params(super_model.Min_Model())},
        Guider: {count_num_params(super_model.Mix_Model())}
        ''')
    pprint('load daily data...')
    day_train_set, day_valid_set, day_test_set = day_loader.load_data()

    ###### Day Model 1#######
    train_hids, valid_hids, test_hids = Day_model_1(output_path, day_model_1,
                                         day_train_set, day_valid_set,
                                         day_test_set)

    ###### High-freq Model and Data Prepare ########
    set_random_seed(seed)
    min_loader = getattr(loaders_module, loader_name).DataLoader(dset=dsets[1])

    pprint('load high-freq data...')
    min_train_set, min_valid_set, min_test_set = min_loader.load_data()

    ####### Min Model ######
    itera = 0
    set_random_seed(seed)
    _run = SummaryWriter(comment='_min_model_%d'%itera) if run_on else None
    min_model = getattr(models_module, model_name).Min_Model()
    min_train_day_reps, min_valid_day_reps, min_test_day_reps = Min_model(_run,
                                                              output_path,
                                                              min_model,
                                                              min_train_set,
                                                              min_valid_set,
                                                              min_test_set,
                                                              train_hids, 
                                                              valid_hids,
                                                              itera=itera)
    ######### Day Model 2########
    set_random_seed(seed)
    _run = SummaryWriter(comment='_day_model2_min_%d'%itera) if run_on else None
    day_model_2 = getattr(models_module, model_name).Day_Model_2()
    pred_path = output_path+'/pred_%s_%d.pkl' %(model_name,itera)
    rmse_min,  mae_min = Day_model_2(
        _run,
        output_path,
        day_model_2,
        day_train_set,
        day_valid_set,
        day_test_set,
        min_train_day_reps, 
        min_valid_day_reps, 
        min_test_day_reps,
        pred_path)
    
    #####Iter prep###
    pre_rmse_min = 100
    pre_mae_min = 100
    # rmse_mix = 0.0
    # mae_mix = 0.0
    #####Iter####
    while (rmse_min < pre_rmse_min) or (mae_min < pre_mae_min):
        itera += 1
        pprint('Iter:', itera)
        pre_rmse_min = rmse_min
        pre_mae_min = mae_min
        # pre_rmse_mix = rmse_mix
        # pre_mae_mix = mae_mix
        ####### Mix Model ######
        set_random_seed(seed)
        mix_model = getattr(models_module, model_name).Mix_Model()
        _run = SummaryWriter(comment='_mix_model_%d' %
                             itera) if run_on else None
        mix_train_day_reps, mix_valid_day_reps, mix_test_day_reps = Mix_model(
            _run,
            output_path,
            mix_model,
            min_train_set,
            min_valid_set,
            min_test_set,
            min_train_day_reps,
            min_valid_day_reps,
            itera=itera)

        ####### Day Model 2######
        set_random_seed(seed)
        pprint('Mix model fine tune...')
        day_model_2 = getattr(models_module, model_name).Day_Model_2()
        _run = SummaryWriter(comment='_day_model2_mix_%d' %
                             itera) if run_on else None
        rmse_mix, mae_mix = Day_model_2(
            _run,
            output_path,
            day_model_2,
            day_train_set,
            day_valid_set,
            day_test_set,
            mix_train_day_reps,
            mix_valid_day_reps,
            mix_test_day_reps)

        ####### Min Model ######
        set_random_seed(seed)
        min_model = getattr(models_module, model_name).Min_Model()
        _run = SummaryWriter(comment='_min_model_%d' %
                             itera) if run_on else None

        min_train_day_reps, min_valid_day_reps, min_test_day_reps = Min_model(
            _run,
            output_path,
            min_model,
            min_train_set,
            min_valid_set,
            min_test_set,
            mix_train_day_reps,
            mix_valid_day_reps,
            itera=itera)
        ####### Day Model 2######
        set_random_seed(seed)
        pprint('Min model fine tune...')
        pred_path = output_path+'/pred_%s_%d.pkl' %(model_name,itera)
        day_model_2 = getattr(models_module, model_name).Day_Model_2()
        _run = SummaryWriter(comment='_day_model2_min_%d' %
                             itera) if run_on else None
        rmse_min, mae_min = Day_model_2(
            _run,
            output_path,
            day_model_2,
            day_train_set,
            day_valid_set,
            day_test_set,
            min_train_day_reps,
            min_valid_day_reps,
            min_test_day_reps,
            pred_path)

        pprint(f'Iter: {itera}')
        pprint(
            f'pre_rmse_min: {pre_rmse_min}, rmse_min:{rmse_min}')
        pprint('###################')
Esempio n. 6
0
    # Start from scratch
    create_result_dir(args)
    alg_param_grid = np.around(get_grid(args.param_grid_def), decimals=10)
    n_gammas = len(alg_param_grid)
    mean_R = np.full(n_gammas, np.nan)
    std_R = np.full(n_gammas, np.nan)

if run_mode in {'New', 'Continue', 'ContinueNewGrid', 'ContinueAddGrid'}:
    # Run grid
    ray.init(local_mode=local_mode)
    start_time = timeit.default_timer()

    for i_grid, alg_param in enumerate(alg_param_grid):
        if not np.isnan(mean_R[i_grid]):
            continue  # this index already completed
        set_random_seed(args.seed)

        if args.param_grid_def['type'] == 'L2_factor':
            l2_factor = alg_param
            run_name = 'L2_' + str(l2_factor)
        elif args.param_grid_def['type'] == 'gamma_guidance':
            gamma_guidance = alg_param
            run_name = 'Gamma_' + str(alg_param)
        else:
            raise ValueError('Unrecognized args.grid_type')

        write_to_log('Starting: {}, time: {}'.format(run_name, time_now()),
                     args)

        # Training
        analysis = tune.run(
Esempio n. 7
0
def run_simulations(args, save_result, local_mode):
    import ray
    ray.init(local_mode=local_mode)
    # A Ray remote function.
    # runs a single repetition of the experiment
    @ray.remote  # (num_cpus=0.2)  # specify how much resources the process needs
    def run_rep(i_rep, alg_param_grid, config_grid, args):
        nS = args.nS
        if args.initial_state_distrb_type == 'middle':
            args.initial_state_distrb = np.zeros(nS)
            args.initial_state_distrb[nS // 2] = 1.
        elif args.initial_state_distrb_type == 'uniform':
            args.initial_state_distrb = np.ones(nS) / nS

        initial_state_distrb = args.initial_state_distrb
        n_grid = alg_param_grid.shape[0]
        n_configs = args.n_configs
        loss_rep = np.zeros((n_configs, n_grid))

        # default values
        gammaEval = args.gammaEval
        if args.default_gamma is None:
            gamma_guidance = gammaEval
        else:
            gamma_guidance = args.default_gamma
        l2_fp = 1e-5
        l2_proj = args.default_l2_proj

        for i_config in range(args.n_configs):  # grid of n_configs

            n_traj = args.default_n_trajectories
            if args.config_grid_def['type'] == 'n_trajectories':
                n_traj = config_grid[i_config]
            elif args.config_grid_def['type'] == 'trajectory_len':
                args.depth = config_grid[i_config]
            elif args.config_grid_def['type'] == 'p_left':
                args.mrp_def['p_left'] = config_grid[i_config]

            # Generate MDP:
            M = MRP(args)

            for i_grid, alg_param in enumerate(alg_param_grid):

                # grid values:
                if args.param_grid_def['type'] == 'l2_proj':
                    l2_proj = alg_param
                elif args.param_grid_def['type'] == 'l2_fp':
                    l2_fp = alg_param
                elif args.param_grid_def['type'] == 'gamma_guidance':
                    gamma_guidance = alg_param
                elif args.param_grid_def['type'] == 'l2_factor':
                    l2_fp = alg_param
                    l2_proj = alg_param
                else:
                    raise ValueError('Unrecognized args.grid_type')

                if args.alg_type not in ['LSTD_Nested', 'LSTD_Nested_Standard']\
                        and args.param_grid_def['type'] == 'l2_fp':
                    raise Warning(args.alg_type + ' does not use l2_fp !!!')

                V_true = np.linalg.solve((np.eye(nS) - gammaEval * M.P), M.R)

                # Generate data:
                data = M.SampleData(n_traj, args.depth, p0=initial_state_distrb, reward_std=args.reward_std,
                                    sampling_type=args.sampling_type)

                # value estimation:
                if args.alg_type == 'LSTD':
                    V_est = LSTD(data, gamma_guidance, args, l2_factor=l2_proj)
                elif args.alg_type == 'LSTD_Nested':
                    V_est = LSTD_Nested(data, gamma_guidance, args, l2_proj, l2_fp)

                elif args.alg_type == 'LSTD_Nested_Standard':
                    V_est = LSTD_Nested_Standard(data, gamma_guidance, args, l2_proj, l2_fp)
                elif args.alg_type == 'batch_TD_value_evaluation':
                    V_est = batch_TD_value_evaluation(data, gamma_guidance, args, l2_factor=l2_proj)
                else:
                    raise ValueError('Unrecognized args.grid_type')
                loss_type = args.evaluation_loss_type
                pi = None
                eval_loss = evaluate_value_estimation(loss_type, V_true, V_est, M, pi, gammaEval, gamma_guidance)
                loss_rep[i_config, i_grid] = eval_loss
            # end for i_grid
        #  end for i_config
        return loss_rep
    # end run_rep

    start_time = timeit.default_timer()
    if save_result:
        create_result_dir(args)
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]

    config_grid = get_grid(args.config_grid_def)
    n_configs = len(config_grid)
    args.n_configs = n_configs

    planing_loss = np.zeros((n_reps, n_configs, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid, config_grid, args)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps), args)
        planing_loss[i_rep] = loss_rep
    # end for i_rep
    info_dict = {'planing_loss_avg': planing_loss.mean(axis=0), 'planing_loss_std': planing_loss.std(axis=0),
                 'alg_param_grid': alg_param_grid, 'config_grid': config_grid}
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log('Total runtime: ' +
                 time.strftime("%H hours, %M minutes and %S seconds", time.gmtime(stop_time - start_time)), args)
    return info_dict
Esempio n. 8
0
def run_simulation(args):
    import ray
    start_time = timeit.default_timer()
    create_result_dir(args)
    set_random_seed(args.seed)

    k_grid = np.arange(1, 6)
    n_grid = len(k_grid)
    no_reg_err_mean = np.zeros(n_grid)
    no_reg_err_std = np.zeros(n_grid)
    best_gamma_err_mean = np.zeros(n_grid)
    best_gamma_err_std = np.zeros(n_grid)
    best_l2_err_mean = np.zeros(n_grid)
    best_l2_err_std = np.zeros(n_grid)

    for i_k, k in enumerate(k_grid):
        args_run = deepcopy(args)
        args_run.mdp_def['k'] = k

        # Run gamma grid
        args_run.param_grid_def = {
            'type': 'gamma_guidance',
            'spacing': 'linspace',
            'start': 0.1,
            'stop': 0.99,
            'num': 50
        }
        alg_param_grid = get_grid(args_run.param_grid_def)
        info_dict = run_main(args_run, save_result=False)
        planing_loss_avg = info_dict['planing_loss_avg']
        planing_loss_std = info_dict['planing_loss_std']
        # Mark the best gamma:
        i_best = np.argmin(planing_loss_avg[0])
        best_gamma_err_mean[i_k] = planing_loss_avg[0][i_best]
        best_gamma_err_std[i_k] = planing_loss_std[0][i_best]

        args_run.param_grid_def = {
            'type': 'L2_factor',
            'spacing': 'linspace',
            'start': 0.0,
            'stop': 0.01,
            'num': 50
        }
        alg_param_grid = get_grid(args_run.param_grid_def)
        info_dict = run_main(args_run, save_result=False)
        planing_loss_avg = info_dict['planing_loss_avg']
        planing_loss_std = info_dict['planing_loss_std']
        # Mark the best gamma:
        i_best = np.argmin(planing_loss_avg[0])
        best_l2_err_mean[i_k] = planing_loss_avg[0][i_best]
        best_l2_err_std[i_k] = planing_loss_std[0][i_best]

        no_reg_err_mean[i_k] = planing_loss_avg[0][0]
        no_reg_err_std = planing_loss_std[0][0]
    # end for
    grid_results_dict = {
        'k_grid': k_grid,
        'best_gamma_err_mean': best_gamma_err_mean,
        'best_gamma_err_std': best_gamma_err_std,
        'best_l2_err_mean': best_l2_err_mean,
        'best_l2_err_std': best_l2_err_std,
        'no_reg_err_mean': no_reg_err_mean,
        'no_reg_err_std': no_reg_err_std
    }
    save_run_data(args, grid_results_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return grid_results_dict
Esempio n. 9
0
def train(train_file,
          test_file=None,
          embed_file=None,
          n_epoch=20,
          batch_size=5000,
          lr=2e-3,
          model_config=None,
          device=-1,
          save_dir=None,
          seed=None,
          cache_dir='',
          refresh_cache=False):
    if seed is not None:
        utils.set_random_seed(seed, device)
    logger = logging.getLogger()
    assert isinstance(logger, logging.AppLogger)
    if model_config is None:
        model_config = {}

    loader = dataset.DataLoader.build(input_file=train_file,
                                      word_embed_file=embed_file,
                                      refresh_cache=refresh_cache,
                                      extra_ids=(git.hash(), ),
                                      cache_options=dict(dir=cache_dir,
                                                         mkdir=True,
                                                         logger=logger))
    train_dataset = loader.load(train_file,
                                train=True,
                                bucketing=True,
                                refresh_cache=refresh_cache)
    test_dataset = None
    if test_file is not None:
        test_dataset = loader.load(test_file,
                                   train=False,
                                   bucketing=True,
                                   refresh_cache=refresh_cache)

    model = _build_parser(loader, **model_config)
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu(device)
    optimizer = chainer.optimizers.Adam(alpha=lr,
                                        beta1=0.9,
                                        beta2=0.9,
                                        eps=1e-12)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(5.0))
    optimizer.add_hook(
        optimizers.ExponentialDecayAnnealing(initial_lr=lr,
                                             decay_rate=0.75,
                                             decay_step=5000,
                                             lr_key='alpha'))

    def _report(y, t):
        arc_accuracy, rel_accuracy = model.compute_accuracy(y, t)
        training.report({
            'arc_accuracy': arc_accuracy,
            'rel_accuracy': rel_accuracy
        })

    trainer = training.Trainer(optimizer, model, loss_func=model.compute_loss)
    trainer.configure(utils.training_config)
    trainer.add_listener(
        training.listeners.ProgressBar(lambda n: tqdm(total=n)), priority=200)
    trainer.add_hook(training.BATCH_END,
                     lambda data: _report(data['ys'], data['ts']))
    if test_dataset:
        evaluator = Evaluator(model, loader.rel_map, test_file, logger)
        trainer.add_listener(evaluator, priority=128)
        if save_dir is not None:
            accessid = logger.accessid
            date = logger.accesstime.strftime('%Y%m%d')
            trainer.add_listener(
                utils.Saver(model,
                            basename="{}-{}".format(date, accessid),
                            context=dict(App.context, loader=loader),
                            directory=save_dir,
                            logger=logger,
                            save_best=True,
                            evaluate=(lambda _: evaluator._parsed['UAS'])))
    trainer.fit(train_dataset, test_dataset, n_epoch, batch_size)
Esempio n. 10
0
def run_simulations(args, save_result, local_mode):
    import ray
    ray.init(local_mode=local_mode, ignore_reinit_error=True),
    # A Ray remote function.
    # Runs a single repetition of the experiment
    @ray.remote
    def run_rep(i_rep, alg_param_grid, n_traj_grid, args_r):
        traj_grid_len = len(n_traj_grid)
        n_grid = len(alg_param_grid)

        # runs a single repetition of the experiment
        loss_rep = np.zeros((traj_grid_len, n_grid))

        # default values
        gammaEval = args_r.gammaEval
        if args_r.default_gamma is None:
            gamma_guidance = gammaEval
        else:
            gamma_guidance = args_r.default_gamma
        l2_factor = None
        l1_factor = None

        # Generate MDP:
        M = MDP(args_r)

        # Optimal policy for the MDP:
        pi_opt, V_opt, Q_opt = PolicyIteration(M, gammaEval)

        for i_grid, alg_param in enumerate(alg_param_grid):

            if args_r.param_grid_def['type'] == 'L2_factor':
                l2_factor = alg_param
            elif args_r.param_grid_def['type'] == 'L1_factor':
                l1_factor = alg_param
            elif args_r.param_grid_def['type'] == 'gamma_guidance':
                gamma_guidance = alg_param
            else:
                raise ValueError('Unrecognized args.grid_type')

            for i_n_traj, n_traj in enumerate(
                    args_r.n_traj_grid
            ):  # grid of number of trajectories to generate
                if args_r.method == 'Expected_SARSA':
                    pi_t = ExpectedSARSA_Learning(args_r, M, n_traj,
                                                  gamma_guidance, l2_factor,
                                                  l1_factor)
                elif args_r.method == 'Model_Based':
                    pi_t = ModelBasedLearning(args_r, M, n_traj,
                                              gamma_guidance)
                elif args_r.method == 'SARSA':
                    pi_t = SARSA_Learning(args_r, M, n_traj, gamma_guidance)
                else:
                    raise ValueError('unrecognized method')
                # Evaluate performance of policy:
                V_t, _ = PolicyEvaluation(M, pi_t, gammaEval)
                loss_rep[i_n_traj, i_grid] = (np.abs(V_opt - V_t)).mean()
            # end for i_n_traj
        #  end for i_grid
        return loss_rep

    # end run_rep
    # --------------------------------------------------
    start_time = timeit.default_timer()
    if save_result:
        create_result_dir(args)
    set_random_seed(args.seed)

    n_reps = args.n_reps
    alg_param_grid = get_grid(args.param_grid_def)
    n_grid = alg_param_grid.shape[0]
    traj_grid_len = len(args.n_traj_grid)
    planing_loss = np.zeros((n_reps, traj_grid_len, n_grid))

    # ----- Run simulation in parrnell process---------------------------------------------#
    loss_rep_id_lst = []
    for i_rep in range(n_reps):
        # returns objects ids:
        planing_loss_rep_id = run_rep.remote(i_rep, alg_param_grid,
                                             args.n_traj_grid, args)
        loss_rep_id_lst.append(planing_loss_rep_id)
    # -----  get the results --------------------------------------------#
    for i_rep in range(n_reps):
        loss_rep = ray.get(loss_rep_id_lst[i_rep])
        write_to_log('Finished: {} out of {} reps'.format(i_rep + 1, n_reps),
                     args)
        planing_loss[i_rep] = loss_rep
    # end for i_rep
    info_dict = {
        'planing_loss_avg': planing_loss.mean(axis=0),
        'planing_loss_std': planing_loss.std(axis=0),
        'alg_param_grid': alg_param_grid
    }
    if save_result:
        save_run_data(args, info_dict)
    stop_time = timeit.default_timer()
    write_to_log(
        'Total runtime: ' +
        time.strftime("%H hours, %M minutes and %S seconds",
                      time.gmtime(stop_time - start_time)), args)
    return info_dict