Beispiel #1
0
def create_experiment_dir(storage_name_id, config, config_unique_dict, SEEDS, root_dir, git_hashes=None):
    # Determine experiment number

    tmp_dir_tree = DirectoryTree(id=storage_name_id, alg_name=config.alg_name, task_name=config.task_name,
                                 desc=config.desc, seed=1, git_hashes=git_hashes, root=root_dir)

    experiment_num = int(tmp_dir_tree.experiment_dir.name.strip('experiment'))

    # For each seed in these experiments, creates a directory

    for seed in SEEDS:
        config.seed = seed
        config_unique_dict['seed'] = seed

        # Creates the experiment directory

        dir_tree = DirectoryTree(id=storage_name_id,
                                 alg_name=config.alg_name,
                                 task_name=config.task_name,
                                 desc=config.desc,
                                 seed=config.seed,
                                 experiment_num=experiment_num,
                                 git_hashes=git_hashes,
                                 root=root_dir)

        dir_tree.create_directories()

        # Saves the config as json file (to be run later)

        save_config_to_json(config, filename=str(dir_tree.seed_dir / 'config.json'))

        # Saves a dictionary of what makes each seed_dir unique (just for display on graphs)

        validate_config_unique(config, config_unique_dict)
        save_dict_to_json(config_unique_dict, filename=str(dir_tree.seed_dir / 'config_unique.json'))

        # Creates empty file UNHATCHED meaning that the experiment is ready to be run

        open(str(dir_tree.seed_dir / 'UNHATCHED'), 'w+').close()

    return dir_tree
Beispiel #2
0
def train(config, dir_tree=None, pbar="default_pbar", logger=None):
    # Overide config

    if config.overide_default_args:
        overide_args(args=config, alg_name=config.alg_name)

    dir_tree, logger, pbar = create_management_objects(dir_tree=dir_tree,
                                                       logger=logger,
                                                       pbar=pbar,
                                                       config=config)
    if pbar is not None:
        pbar.total = config.max_transitions if config.max_episodes is None else config.max_episodes

    # Manages GPU usage

    train_device, rollout_device, logger = get_computing_devices(
        use_gpu=config.use_gpu,
        torch=torch,
        do_rollouts_on_cpu=config.do_rollouts_on_cpu,
        logger=logger)
    config.train_device = train_device.type
    config.rollout_device = rollout_device.type

    # Sanity check config and save it

    config = sanity_check_args(config=config,
                               logger=logger,
                               alg_name=config.alg_name)
    config.experiment_name = str(dir_tree.experiment_dir)
    save_config_to_json(config, str(dir_tree.seed_dir / 'config.json'))

    if (dir_tree.seed_dir / "config_unique.json").exists():
        config_unique_dict = load_dict_from_json(dir_tree.seed_dir /
                                                 "config_unique.json")
        config_unique_dict.update(
            (k, config.__dict__[k])
            for k in config_unique_dict.keys() & config.__dict__.keys())
        save_dict_to_json(config_unique_dict,
                          str(dir_tree.seed_dir / 'config_unique.json'))

    # Importing wandb (or not)

    if not config.sync_wandb:
        os.environ['WANDB_MODE'] = 'dryrun'
        os.environ['WANDB_DISABLE_CODE'] = 'true'

    if config.use_wandb:
        import wandb
        os.environ["WANDB_DIR"] = str(dir_tree.seed_dir.absolute())
        wandb.init(id=dir_tree.get_run_name(),
                   project='il_without_rl',
                   entity='irl_la_forge',
                   reinit=True)
        wandb.config.update(config, allow_val_change=True)
        wandb_save_dir = Path(
            wandb.run.dir) if config.save_best_model_to_wandb else None
    else:
        wandb = NOP()
        wandb_save_dir = None

    # Create env

    env = make_env(config.task_name)
    test_env = make_env(config.task_name)
    set_seeds(config.seed, env)
    set_seeds(config.seed, test_env)
    env_dims = ml.get_env_dims(env)

    # Create learner

    learner = init_from_config(env_dims, config)

    # Creates Experience Buffer

    if config.alg_name in RL_OFF_POLICY_ALGS:
        if config.alg_name in ['sqil', 'sqil-c']:
            demo_trajectories = load_expert_trajectories(
                task_name=config.task_name,
                demos_name=config.demos_name,
                demos_folder=config.demos_folder)

            # Put expert demonstrations into buffer

            buffer = SQILBuffer(demo_trajectories=demo_trajectories,
                                max_transitions=config.replay_buffer_length,
                                obs_space=env_dims['obs_space'],
                                act_space=env_dims['act_space'])

            logger.info(
                f"Number of expert trajectories: {buffer.n_demonstration_trajectories} \n"
                f"Number of expert transitions: {buffer.n_demonstration_transitions} "
            )

            # Loading init_states from demos

            if config.task_name in POMMERMAN_TASKS:
                demos = load_expert_demos(config.demos_folder,
                                          config.demos_name)
                env.init_game_states = load_game_states_from_demos(demos,
                                                                   idx=0)
                test_env.init_game_states = env.init_game_states

        else:
            buffer = ReplayBuffer(max_transitions=config.replay_buffer_length,
                                  obs_space=env_dims['obs_space'],
                                  act_space=env_dims['act_space'])

    elif config.alg_name in RL_ON_POLICY_ALGS:
        buffer = OnPolicyBuffer(obs_space=env_dims['obs_space'])

    else:
        raise NotImplementedError

    # Creates recorders

    if config.task_name in POMMERMAN_TASKS:
        learner.metrics_to_record.add('n_woods')
        learner.metrics_to_record.add('n_enemies')

    os.makedirs(dir_tree.recorders_dir, exist_ok=True)
    train_recorder = Recorder(metrics_to_record=learner.metrics_to_record)

    # Initialize counters

    total_transitions = 0
    total_transitions_at_last_done = 0
    episode = 0
    epoch = 0
    cumul_eval_return = 0.
    eval_step = 0
    ret = 0
    best_eval_return = -float('inf')

    if config.max_episodes is not None:
        max_itr, heartbeat_ite = config.max_episodes, config.episodes_between_saves
    else:
        max_itr, heartbeat_ite = config.max_transitions, config.transitions_between_saves
    it = TrainingIterator(max_itr=max_itr, heartbeat_ite=heartbeat_ite)

    to_save = [learner]
    to_evaluate = learner
    to_watch = to_save if config.wandb_watch_models else []

    eval_perf_queue = deque(maxlen=5)
    ml.wandb_watch(wandb, to_watch)

    # Saves initial model

    ite_model_save_name = f'model_ep{episode}.pt'
    save(to_save, dir_tree.seed_dir, suffix=ite_model_save_name)

    # Initial reset

    state = env.reset()

    disp = config.render
    learner.prep_rollout(rollout_device)

    # Training loop

    while True:

        # ENVIRONMENT STEP

        if disp:
            env.render()

        if total_transitions <= config.warmup and config.alg_name in RL_OFF_POLICY_ALGS:
            action = learner.act(state, sample=True)
        else:
            action = learner.act(state, sample=True)
        next_state, reward, done, info = env.step(action)

        buffer.push(state, action, next_state, reward, ml.mask(done))

        state = next_state

        ret += reward
        total_transitions += 1
        if config.max_transitions is not None:
            it.touch()
            if it.itr <= config.warmup and config.alg_name in RL_OFF_POLICY_ALGS:
                it._heartbeat = False
            if pbar is not None:
                pbar.update()

        # episode ending

        if done:
            it.record('return', ret)
            it.record('episode_len',
                      total_transitions - total_transitions_at_last_done)

            for info_key, info_value in info.items():
                if info_key in learner.metrics_to_record:
                    it.record(info_key, info_value)

            state = env.reset()

            ret = 0
            disp = False
            total_transitions_at_last_done = total_transitions
            episode += 1
            if config.max_episodes is not None:
                it.touch()  # increment recorder
                if pbar is not None:
                    pbar.update()

        # TRAINING STEP

        if should_update_rl(episode=episode,
                            done=done,
                            n_transitions=buffer.n_transitions,
                            total_transitions=total_transitions,
                            config=config,
                            name=config.alg_name):

            # Sample transitions (off-policy algo) or just takes the freshly collected (on-policy)

            if config.alg_name in RL_OFF_POLICY_ALGS:
                # we sample a batch normally
                experiences = buffer.sample(config.batch_size)
            else:
                # we flush the buffer (on-policy learning)
                experiences = buffer.flush()

            # Train the model

            learner.prep_training(train_device)  # train mode
            return_dict = learner.train_model(experiences)
            it.update(return_dict)
            learner.prep_rollout(rollout_device)  # back to eval mode

            epoch += 1

        # PLOTTING AND RECORDING

        if it.heartbeat:

            # Recording some metrics

            new_recordings = {
                'total_transitions': total_transitions,
                'epoch': epoch,
                'episode': episode,
                'eval_step': eval_step
            }

            performance_metrics = to_evaluate.evaluate_perf(
                env=test_env,
                n_episodes=config.number_of_eval_episodes,
                seed=config.validation_seed,
                sample=config.sample_in_eval,
                render=config.render_evaluate)

            eval_step += 1
            cumul_eval_return += performance_metrics['eval_return']

            performance_metrics['cumul_eval_return'] = cumul_eval_return
            performance_metrics[
                'avg_eval_return'] = cumul_eval_return / eval_step

            new_recordings.update(performance_metrics)

            means_to_log = it.pop_all_means()
            new_recordings.update(means_to_log)
            train_recorder.write_to_tape(new_recordings)
            train_recorder.save(dir_tree.recorders_dir / 'train_recorder.pkl')

            wandb.log(new_recordings, step=eval_step)

            # Saving best model

            eval_perf_queue.append(performance_metrics['eval_return'])
            running_avg_perf = np.mean(eval_perf_queue)

            if running_avg_perf >= best_eval_return:
                new_best = running_avg_perf
                best_eval_return = running_avg_perf
                save(to_save, dir_tree.seed_dir, f'model_best.pt')
                save(to_save, wandb_save_dir, f'model_best.pt')

                logger.info(
                    f"Eval step {eval_step}: Saved new best model at {str(dir_tree.seed_dir / 'model_best.pt')} "
                    f"with average perfo of {new_best}")

            # Saving current model periodically (even if not best)

            if (it.itr % (10 * heartbeat_ite)) == 0:
                remove(models=to_save,
                       directory=dir_tree.seed_dir,
                       suffix=ite_model_save_name)
                ite_model_save_name = f"model_eval_step_{eval_step}.pt"
                save(to_save, dir_tree.seed_dir, ite_model_save_name)

                logger.info(
                    f"Eval step {eval_step}: Saved model {str(dir_tree.seed_dir / f'model_eval_step_{eval_step}.pt')} "
                    f"(avg perfo {running_avg_perf})")

            # Creating and saving plots
            try:
                learner.save_training_graphs(train_recorder, dir_tree.seed_dir)
            except ImportError:
                pass
            disp = config.render

        if config.max_episodes is not None:
            if episode > config.max_episodes:
                break

        if config.max_transitions is not None:
            if total_transitions > config.max_transitions:
                break

    # Saving last model

    save(to_save, dir_tree.seed_dir, f"model_eval_step_{eval_step}.pt")

    logger.info(f"Saved last model: model_eval_step_{eval_step}.pt")
    logger.info(f"{Path(os.path.abspath(__file__)).parent.name}/{__file__}")

    # finishes logging before exiting training script

    wandb.join()
Beispiel #3
0
def _compute_seed_scores(storage_dir, performance_metric,
                         performance_aggregation, group_key, bar_key,
                         re_run_if_exists, save_dir, logger, root_dir,
                         n_eval_runs):
    if (storage_dir / save_dir /
            f"{save_dir}_seed_scores.pkl").exists() and not re_run_if_exists:
        logger.info(
            f" SKIPPING {storage_dir} - {save_dir}_seed_scores.pkl already exists"
        )
        return

    else:
        logger.info(f"Benchmarking {storage_dir}...")

    assert group_key in [
        'task_name', 'storage_name', 'experiment_num', 'alg_name'
    ]
    assert bar_key in [
        'task_name', 'storage_name', 'experiment_num', 'alg_name'
    ]

    # Initialize container

    scores = OrderedDict()

    # Get all experiment directories

    all_experiments = DirectoryTree.get_all_experiments(
        storage_dir=storage_dir)

    for experiment_dir in all_experiments:

        # For that experiment, get all seed directories

        experiment_seeds = DirectoryTree.get_all_seeds(
            experiment_dir=experiment_dir)

        # Initialize container

        all_seeds_scores = []

        for i, seed_dir in enumerate(experiment_seeds):
            # Prints which seed directory is being treated

            logger.debug(f"{seed_dir}")

            # Loads training config

            config_dict = load_dict_from_json(str(seed_dir / "config.json"))

            # Selects how data will be identified

            keys = {
                "task_name": config_dict["task_name"],
                "storage_name": seed_dir.parents[1].name,
                "alg_name": config_dict["alg_name"],
                "experiment_num": seed_dir.parents[0].name.strip('experiment')
            }

            outer_key = keys[bar_key]
            inner_key = keys[group_key]

            # Evaluation phase

            if performance_metric == 'evaluation_runs':

                assert n_eval_runs is not None

                try:
                    from evaluate import evaluate, get_evaluation_args
                except ImportError as e:
                    raise ImportError(
                        f"{e}\nTo evaluate models based on --performance_metric='evaluation_runs' "
                        f"alfred.benchmark assumes the following structure that the working directory contains "
                        f"a file called evaluate.py containing two functions: "
                        f"\n\t1. a function evaluate() that returns a score for each evaluation run"
                        f"\n\t2. a function get_evaluation_args() that returns a Namespace of arguments for evaluate()"
                    )

                # Sets config for evaluation phase

                eval_config = get_evaluation_args(overwritten_args="")
                eval_config.storage_name = seed_dir.parents[1].name
                eval_config.experiment_num = int(
                    seed_dir.parents[0].name.strip("experiment"))
                eval_config.seed_num = int(seed_dir.name.strip("seed"))
                eval_config.render = False
                eval_config.n_episodes = n_eval_runs
                eval_config.root_dir = root_dir

                # Evaluates agent and stores the return

                performance_data = evaluate(eval_config)

            else:

                # Loads training data

                loaded_recorder = Recorder.init_from_pickle_file(
                    filename=str(seed_dir / 'recorders' /
                                 'train_recorder.pkl'))

                performance_data = loaded_recorder.tape[performance_metric]

            # Aggregation phase

            if performance_aggregation == 'min':
                score = np.min(performance_data)

            elif performance_aggregation == 'max':
                score = np.max(performance_data)

            elif performance_aggregation == 'avg':
                score = np.mean(performance_data)

            elif performance_aggregation == 'last':
                score = performance_data[-1]

            elif performance_aggregation == 'mean_on_last_20_percents':
                eighty_percent_index = int(0.8 * len(performance_data))
                score = np.mean(performance_data[eighty_percent_index:])
            else:
                raise NotImplementedError

            all_seeds_scores.append(score)

        if outer_key not in scores.keys():
            scores[outer_key] = OrderedDict()

        scores[outer_key][inner_key] = np.stack(all_seeds_scores)

    os.makedirs(storage_dir / save_dir, exist_ok=True)

    with open(storage_dir / save_dir / f"{save_dir}_seed_scores.pkl",
              "wb") as f:
        pickle.dump(scores, f)

    scores_info = {
        'n_eval_runs': n_eval_runs,
        'performance_metric': performance_metric,
        'performance_aggregation': performance_aggregation
    }

    save_dict_to_json(scores_info,
                      filename=str(storage_dir / save_dir /
                                   f"{save_dir}_seed_scores_info.json"))
Beispiel #4
0
def _make_vertical_densities_figure(storage_dirs, visuals_file,
                                    additional_curves_file, make_box_plot,
                                    queried_performance_metric,
                                    queried_performance_aggregation, save_dir,
                                    load_dir, logger):
    # Initialize container

    all_means = OrderedDict()
    long_labels = OrderedDict()
    titles = OrderedDict()
    labels = OrderedDict()
    colors = OrderedDict()
    markers = OrderedDict()
    all_performance_metrics = []
    all_performance_aggregation = []

    # Gathers data

    for storage_dir in storage_dirs:
        logger.debug(storage_dir)

        # Loads the scores and scores_info saved by summarize_search

        with open(str(storage_dir / load_dir / f"{load_dir}_seed_scores.pkl"),
                  "rb") as f:
            scores = pickle.load(f)

        scores_info = load_dict_from_json(
            str(storage_dir / "summary" / f"summary_seed_scores_info.json"))
        all_performance_metrics.append(scores_info['performance_metric'])
        all_performance_aggregation.append(
            scores_info['performance_aggregation'])

        x = list(scores.keys())[0]
        storage_name = storage_dir.name

        # Adding task_name if first time it is encountered

        _, _, _, outer_key, _ = DirectoryTree.extract_info_from_storage_name(
            storage_name)
        if outer_key not in list(all_means.keys()):
            all_means[outer_key] = OrderedDict()

        # Taking the mean across evaluations and seeds

        _, _, _, outer_key, _ = DirectoryTree.extract_info_from_storage_name(
            storage_name)
        all_means[outer_key][storage_name] = [
            array.mean() for array in scores[x].values()
        ]

        if outer_key not in long_labels.keys():
            long_labels[outer_key] = [storage_dir]
        else:
            long_labels[outer_key].append(storage_dir)

    # Security checks

    assert len(set(all_performance_metrics)) == 1 and len(set(all_performance_aggregation)) == 1, \
        "Error: all seeds do not have scores computed using the same performance metric or performance aggregation. " \
        "You should benchmark with --re_run_if_exists=True using the desired --performance_aggregation and " \
        "--performance_metric so that all seeds that you want to compare have the same metrics."
    actual_performance_metric = all_performance_metrics.pop()
    actual_performance_aggregation = all_performance_aggregation.pop()

    assert queried_performance_metric == actual_performance_metric and \
           queried_performance_aggregation == actual_performance_aggregation, \
        "Error: The performance_metric or performance_aggregation that was queried for the vertical_densities " \
        "is not the same as what was saved by summarize_search. You should benchmark with --re_run_if_exists=True " \
        "using the desired --performance_aggregation and  --performance_metric so that all seeds that you want " \
        "to compare have the same metrics."

    # Initialize figure

    n_graphs = len(all_means.keys())

    if n_graphs == 3:
        axes_shape = (1, 3)

    elif n_graphs > 1:
        i_max = int(np.ceil(np.sqrt(len(all_means.keys()))))
        axes_shape = (int(np.ceil(len(all_means.keys()) / i_max)), i_max)
    else:
        axes_shape = (1, 1)

    # Creates figure

    gs = gridspec.GridSpec(*axes_shape)
    fig = plt.figure(figsize=(12 * axes_shape[1], 5 * axes_shape[0]))

    for i, outer_key in enumerate(all_means.keys()):

        # Selects right ax object

        if axes_shape == (1, 1):
            current_ax = fig.add_subplot(gs[0, 0])
        elif any(np.array(axes_shape) == 1):
            current_ax = fig.add_subplot(gs[0, i])
        else:
            current_ax = fig.add_subplot(gs[i // axes_shape[1],
                                            i % axes_shape[1]])

        # Collect algorithm names

        if all([
                type(long_label) is pathlib.PosixPath
                for long_label in long_labels[outer_key]
        ]):
            algs = []
            for path in long_labels[outer_key]:
                _, _, alg, _, _ = DirectoryTree.extract_info_from_storage_name(
                    path.name)
                algs.append(alg)

        # Loads visuals dictionaries

        if visuals_file is not None:
            visuals = load_dict_from_json(visuals_file)
        else:
            visuals = None

        # Loads additional curves file

        if additional_curves_file is not None:
            additional_curves = load_dict_from_json(additional_curves_file)
        else:
            additional_curves = None

        # Sets visuals

        if type(visuals) is dict and 'titles_dict' in visuals.keys():
            titles[outer_key] = visuals['titles_dict'][outer_key]
        else:
            titles[outer_key] = outer_key

        if type(visuals) is dict and 'labels_dict' in visuals.keys():
            labels[outer_key] = [visuals['labels_dict'][alg] for alg in algs]
        else:
            labels[outer_key] = long_labels[outer_key]

        if type(visuals) is dict and 'colors_dict' in visuals.keys():
            colors[outer_key] = [visuals['colors_dict'][alg] for alg in algs]
        else:
            colors[outer_key] = [None for _ in long_labels[outer_key]]

        if type(visuals) is dict and 'markers_dict' in visuals.keys():
            markers[outer_key] = [visuals['markers_dict'][alg] for alg in algs]
        else:
            markers[outer_key] = [None for _ in long_labels[outer_key]]

        logger.info(
            f"Graph for {outer_key}:\n\tlabels={labels}\n\tcolors={colors}\n\tmarkers={markers}"
        )

        if additional_curves_file is not None:
            hlines = additional_curves['hlines'][outer_key]
        else:
            hlines = None

        # Makes the plots

        plot_vertical_densities(
            ax=current_ax,
            ys=list(all_means[outer_key].values()),
            labels=labels[outer_key],
            colors=colors[outer_key],
            make_boxplot=make_box_plot,
            title=titles[outer_key].upper(),
            ylabel=
            f"{actual_performance_aggregation}-{actual_performance_metric}",
            hlines=hlines)

    # Saves the figure

    plt.tight_layout()

    filename_addon = "boxplot" if make_box_plot else ""

    for storage_dir in storage_dirs:
        os.makedirs(storage_dir / save_dir, exist_ok=True)

        fig.savefig(storage_dir / save_dir /
                    f'{save_dir}_vertical_densities_{filename_addon}.pdf',
                    bbox_inches="tight")

        save_dict_to_json([str(storage_dir) in storage_dirs],
                          storage_dir / save_dir /
                          f'{save_dir}_vertical_densities_sources.json')

    plt.close(fig)
Beispiel #5
0
def _make_benchmark_performance_figure(storage_dirs,
                                       save_dir,
                                       y_error_bars,
                                       logger,
                                       normalize_with_first_model=True,
                                       sort_bars=False):
    scores, scores_means, scores_err_up, scores_err_down, sorted_inner_keys, reference_key = _gather_scores(
        storage_dirs=storage_dirs,
        save_dir=save_dir,
        y_error_bars=y_error_bars,
        logger=logger,
        normalize_with_first_model=normalize_with_first_model,
        sort_bars=sort_bars)

    # Creates the graph

    n_bars_per_group = len(scores_means.keys())
    n_groups = len(scores_means[reference_key].keys())
    fig, ax = create_fig((1, 1),
                         figsize=(n_bars_per_group * n_groups, n_groups))

    bar_chart(ax,
              scores=scores_means,
              err_up=scores_err_up,
              err_down=scores_err_down,
              group_names=scores_means[reference_key].keys(),
              title="Average Return")

    n_training_seeds = scores[reference_key][list(
        scores_means[reference_key].keys())[0]].shape[0]

    scores_info = load_dict_from_json(
        filename=str(storage_dirs[0] / save_dir /
                     f"{save_dir}_seed_scores_info.json"))

    info_str = f"{n_training_seeds} training seeds" \
               f"\nn_eval_runs={scores_info['n_eval_runs']}" \
               f"\nperformance_metric={scores_info['performance_metric']}" \
               f"\nperformance_aggregation={scores_info['performance_aggregation']}"

    ax.text(0.80,
            0.95,
            info_str,
            transform=ax.transAxes,
            fontsize=12,
            verticalalignment='top',
            bbox=dict(facecolor='gray', alpha=0.1))

    plt.tight_layout()

    # Saves storage_dirs from which the graph was created for traceability

    for storage_dir in storage_dirs:
        os.makedirs(storage_dir / save_dir, exist_ok=True)
        fig.savefig(storage_dir / save_dir / f'{save_dir}_performance.png')
        save_dict_to_json(
            {
                'sources': str(storage_dir) in storage_dirs,
                'n_training_seeds': n_training_seeds,
                'n_eval_runs': scores_info['n_eval_runs'],
                'performance_metric': scores_info['performance_metric'],
                'performance_aggregation':
                scores_info['performance_aggregation']
            }, storage_dir / save_dir / f'{save_dir}_performance_sources.json')

    plt.close(fig)

    # SANITY-CHECKS that no seeds has a Nan score to avoid making create best on it

    expe_with_nan_scores = []
    for outer_key in scores.keys():
        for inner_key, indiv_score in scores[outer_key].items():
            if math.isnan(indiv_score.mean()):
                expe_with_nan_scores.append(outer_key + "/experiment" +
                                            inner_key)

    if len(expe_with_nan_scores) > 0:
        raise ValueError(
            f'Some experiments have nan scores. Remove them from storage and clean summary folder to continue\n'
            f'experiments with Nan Scores:\n' +
            '\n'.join(expe_with_nan_scores))

    return sorted_inner_keys
Beispiel #6
0
def prepare_schedule(desc, schedule_file, root_dir, add_to_folder, resample, logger, ask_for_validation):
    # Infers the search_type (grid or random) from provided schedule_file

    schedule_file_path = Path(schedule_file)

    assert schedule_file_path.suffix == '.py', f"The provided --schedule_file should be a python file " \
                                               f"(see: alfred/schedule_examples). You provided " \
                                               f"'--schedule_file={schedule_file}'"

    if "grid_schedule" in schedule_file_path.name:
        search_type = 'grid'
    elif "random_schedule" in schedule_file_path.name:
        search_type = 'random'
    else:
        raise ValueError(f"Provided --schedule_file has the name '{schedule_file_path.name}'. "
                         "Only grid_schedule's and random_schedule's are supported. "
                         "The name of the provided '--schedule_file' must fit one of the following forms: "
                         "'grid_schedule_NAME.py' or 'random_schedule_NAME.py'.")

    if not schedule_file_path.exists():
        raise ValueError(f"Cannot find the provided '--schedule_file': {schedule_file_path}")

    # Gets experiments parameters

    schedule_module = re.sub('\.py$', '', ".".join(schedule_file.split('/')))

    if search_type == 'grid':

        VARIATIONS, ALG_NAMES, TASK_NAMES, SEEDS, experiments, varied_params, get_run_args, schedule = extract_schedule_grid(schedule_module)

    elif search_type == 'random':

        param_samples, ALG_NAMES, TASK_NAMES, SEEDS, experiments, varied_params, get_run_args, schedule = extract_schedule_random(schedule_module)

    else:
        raise NotImplementedError

    # Creates a list of alg_agent and task_name unique combinations

    if desc is not None:
        assert add_to_folder is None, "If --desc is defined, a new storage_dir folder will be created." \
                                      "No --add_to_folder should be provided."

        desc = f"{search_type}_{desc}"
        agent_task_combinations = list(itertools.product(ALG_NAMES, TASK_NAMES))
        mode = "NEW_STORAGE"

    elif add_to_folder is not None:
        assert (get_root(root_dir) / add_to_folder).exists(), f"{add_to_folder} does not exist."
        assert desc is None, "If --add_to_folder is defined, new experiments will be added to the existing folder." \
                             "No --desc should be provided."

        storage_name_id, git_hashes, alg_name, task_name, desc = \
            DirectoryTree.extract_info_from_storage_name(add_to_folder)

        agent_task_combinations = list(itertools.product([alg_name], [task_name]))
        mode = "EXISTING_STORAGE"

    else:
        raise NotImplementedError

    # Duplicates or resamples hyperparameters to match the number of agent_task_combinations

    n_combinations = len(agent_task_combinations)

    experiments = [experiments]
    if search_type == 'random':
        param_samples = [param_samples]

    if search_type == 'random' and resample:
        assert not add_to_folder
        for i in range(n_combinations - 1):
            param_sa, _, _, _, expe, varied_pa, get_run_args, _ = extract_schedule_random(schedule_module)
            experiments.append(expe)
            param_samples.append(param_sa)

    else:
        experiments = experiments * n_combinations
        if search_type == 'random':
            param_samples = param_samples * n_combinations

    # Printing summary of schedule_xyz.py

    info_str = f"\n\nPreparing a {search_type.upper()} search over {len(experiments)} experiments, {len(SEEDS)} seeds"
    info_str += f"\nALG_NAMES: {ALG_NAMES}"
    info_str += f"\nTASK_NAMES: {TASK_NAMES}"
    info_str += f"\nSEEDS: {SEEDS}"

    if search_type == "grid":
        info_str += f"\n\nVARIATIONS:"
        for key in VARIATIONS.keys():
            info_str += f"\n\t{key}: {VARIATIONS[key]}"
    else:
        info_str += f"\n\nParams to be varied over: {varied_params}"

    info_str += f"\n\nDefault {config_to_str(get_run_args(overwritten_cmd_line=''))}\n"

    logger.debug(info_str)

    # Asking for user validation

    if ask_for_validation:

        if mode == "NEW_STORAGE":
            git_hashes = DirectoryTree.get_git_hashes()

            string = "\n"
            for alg_name, task_name in agent_task_combinations:
                string += f"\n\tID_{git_hashes}_{alg_name}_{task_name}_{desc}"
            logger.debug(f"\n\nAbout to create {len(agent_task_combinations)} storage directories, "
                         f"each with {len(experiments)} experiments:"
                         f"{string}")

        else:
            n_existing_experiments = len([path for path in get_root(root_dir) / add_to_folder.iterdir()
                                          if path.name.startswith('experiment')])

            logger.debug(f"\n\nAbout to add {len(experiments)} experiment folders in the following directory"
                         f" (there are currently {n_existing_experiments} in this folder):"
                         f"\n\t{add_to_folder}")

        answer = input("\nShould we proceed? [y or n]")
        if answer.lower() not in ['y', 'yes']:
            logger.debug("Aborting...")
            sys.exit()

    logger.debug("Starting...")

    # For each storage_dir to be created

    all_storage_dirs = []

    for alg_task_i, (alg_name, task_name) in enumerate(agent_task_combinations):

        # Determines storing ID (if new storage_dir)

        if mode == "NEW_STORAGE":
            tmp_dir_tree = DirectoryTree(alg_name=alg_name, task_name=task_name, desc=desc, seed=1, root=root_dir)
            storage_name_id = tmp_dir_tree.storage_dir.name.split('_')[0]

        # For each experiments...

        for param_dict in experiments[alg_task_i]:

            # Creates dictionary pointer-access to a training config object initialized by default

            config = get_run_args(overwritten_cmd_line="")
            config_dict = vars(config)

            # Modifies the config for this particular experiment

            config.alg_name = alg_name
            config.task_name = task_name
            config.desc = desc

            config_unique_dict = {k: v for k, v in param_dict.items() if k in varied_params}
            config_unique_dict['alg_name'] = config.alg_name
            config_unique_dict['task_name'] = config.task_name
            config_unique_dict['seed'] = config.seed

            for param_name in param_dict.keys():
                if param_name not in config_dict.keys():
                    raise ValueError(f"'{param_name}' taken from the schedule is not a valid hyperparameter "
                                     f"i.e. it cannot be found in the Namespace returned by get_run_args().")
                else:
                    config_dict[param_name] = param_dict[param_name]

            # Create the experiment directory

            dir_tree = create_experiment_dir(storage_name_id, config, config_unique_dict, SEEDS, root_dir, git_hashes)

        all_storage_dirs.append(dir_tree.storage_dir)

        # Saves VARIATIONS in the storage directory

        first_experiment_created = int(dir_tree.current_experiment.strip('experiment')) - len(experiments[0]) + 1
        last_experiment_created = first_experiment_created + len(experiments[0]) - 1

        if search_type == 'grid':

            VARIATIONS['alg_name'] = ALG_NAMES
            VARIATIONS['task_name'] = TASK_NAMES
            VARIATIONS['seed'] = SEEDS

            key = f'{first_experiment_created}-{last_experiment_created}'

            if (dir_tree.storage_dir / 'variations.json').exists():
                variations_dict = load_dict_from_json(filename=str(dir_tree.storage_dir / 'variations.json'))
                assert key not in variations_dict.keys()
                variations_dict[key] = VARIATIONS
            else:
                variations_dict = {key: VARIATIONS}

            save_dict_to_json(variations_dict, filename=str(dir_tree.storage_dir / 'variations.json'))
            open(str(dir_tree.storage_dir / 'GRID_SEARCH'), 'w+').close()

        elif search_type == 'random':
            len_samples = len(param_samples[alg_task_i])
            fig_width = 2 * len_samples if len_samples > 0 else 2
            fig, ax = plt.subplots(len(param_samples[alg_task_i]), 1, figsize=(6, fig_width))
            if not hasattr(ax, '__iter__'):
                ax = [ax]

            plot_sampled_hyperparams(ax, param_samples[alg_task_i],
                                     log_params=['lr', 'tau', 'initial_alpha', 'grad_clip_value', 'lamda1', 'lamda2'])

            j = 1
            while True:
                if (dir_tree.storage_dir / f'variations{j}.png').exists():
                    j += 1
                else:
                    break
            fig.savefig(str(dir_tree.storage_dir / f'variations{j}.png'))
            plt.close(fig)

            open(str(dir_tree.storage_dir / 'RANDOM_SEARCH'), 'w+').close()

        # Printing summary

        logger.info(f'Created directories '
                    f'{str(dir_tree.storage_dir)}/experiment{first_experiment_created}-{last_experiment_created}')

    # Saving the list of created storage_dirs in a text file located with the provided schedule_file

    schedule_name = Path(schedule.__file__).parent.stem
    with open(Path(schedule.__file__).parent / f"list_searches_{schedule_name}.txt", "a+") as f:
        for storage_dir in all_storage_dirs:
            f.write(f"{storage_dir.name}\n")

    logger.info(f"\nEach of these experiments contain directories for the following seeds: {SEEDS}")
Beispiel #7
0
def train(config, dir_tree=None, pbar="default_pbar", logger=None):

    irl_alg_name, rl_alg_name = config.alg_name.split("X")
    config.irl_alg_name = irl_alg_name
    config.rl_alg_name = rl_alg_name

    # Overide config

    if config.overide_default_args:
        config = alg_manager.overide_args(args=config, alg_name=rl_alg_name)
        config = alg_manager.overide_args(args=config, alg_name=irl_alg_name)

    dir_tree, logger, pbar = create_management_objects(dir_tree=dir_tree,
                                                       logger=logger,
                                                       pbar=pbar,
                                                       config=config)
    if pbar is not None:
        pbar.total = config.max_transitions if config.max_episodes is None else config.max_episodes

    # Manages GPU usage

    train_device, rollout_device, logger = get_computing_devices(
        use_gpu=config.use_gpu,
        torch=torch,
        do_rollouts_on_cpu=config.do_rollouts_on_cpu,
        logger=logger)

    config.train_device = train_device.type
    config.rollout_device = rollout_device.type

    # Sanity check config and save it
    config = sanity_check_args(config=config, logger=logger)
    config.experiment_name = str(dir_tree.experiment_dir)
    save_config_to_json(config, str(dir_tree.seed_dir / 'config.json'))

    if (dir_tree.seed_dir / "config_unique.json").exists():
        config_unique_dict = load_dict_from_json(dir_tree.seed_dir /
                                                 "config_unique.json")
        config_unique_dict.update(
            (k, config.__dict__[k])
            for k in config_unique_dict.keys() & config.__dict__.keys())
        save_dict_to_json(config_unique_dict,
                          str(dir_tree.seed_dir / 'config_unique.json'))

    # Importing wandb (or not)

    if not config.sync_wandb:
        os.environ['WANDB_MODE'] = 'dryrun'
        os.environ['WANDB_DISABLE_CODE'] = 'true'

    if config.use_wandb:
        import wandb
        os.environ["WANDB_DIR"] = str(dir_tree.seed_dir.absolute())
        wandb.init(id=dir_tree.get_run_name(),
                   project='il_without_rl',
                   entity='irl_la_forge',
                   reinit=True)
        wandb.config.update(config, allow_val_change=True)
        wandb_save_dir = Path(
            wandb.run.dir) if config.save_best_model_to_wandb else None
    else:
        wandb = NOP()
        wandb_save_dir = None

    # Create env

    env = make_env(config.task_name)
    test_env = make_env(config.task_name)
    set_seeds(config.seed, env)
    set_seeds(config.seed, test_env)
    env_dims = ml.get_env_dims(env)

    # Load demonstrations

    demo_trajectories = load_expert_trajectories(
        task_name=config.task_name,
        demos_name=config.demos_name,
        demos_folder=config.demos_folder)

    # Convert demos into TrajCollection

    expert_traj_collection = TrajCollection([
        Traj(traj=demo, obs_space=env_dims['obs_space'])
        for demo in demo_trajectories
    ])

    logger.info(
        f"Number of expert trajectories: {len(expert_traj_collection)} \n"
        f"Number of expert transitions: {expert_traj_collection.n_transitions} "
    )

    get_expert_perfo(env, expert_traj_collection, logger)

    # Loading init_states from Pommerman demos

    if config.task_name in POMMERMAN_TASKS:
        demos = load_expert_demos(config.demos_folder, config.demos_name)
        env.init_game_states = load_game_states_from_demos(demos, idx=0)
        test_env.init_game_states = env.init_game_states

    # Create learner

    irl_learner = alg_manager.named_init_from_config(env_dims, config,
                                                     irl_alg_name)
    irl_learner.prep_training(train_device)
    irl_learner.add_expert_path(expert_traj_collection)
    irl_learner.prep_rollout(rollout_device)

    if rl_alg_name == '':
        rl_learner = irl_learner
    else:
        rl_learner = alg_manager.named_init_from_config(
            env_dims, config, rl_alg_name)

    # Creates buffers to store the data according to on-policy formulations

    buffer = OnPolicyBuffer(obs_space=env_dims['obs_space'])

    # Creates recorders

    if config.task_name in POMMERMAN_TASKS:
        irl_learner.metrics_to_record.add('n_woods')
        irl_learner.metrics_to_record.add('n_enemies')

    os.makedirs(dir_tree.recorders_dir, exist_ok=True)
    train_recorder = Recorder(metrics_to_record=irl_learner.metrics_to_record
                              | rl_learner.metrics_to_record)

    # Initialize counters

    total_transitions = 0
    total_transitions_at_last_done = 0
    episode = 0
    irl_epoch = 0
    rl_epoch = 0
    cumul_eval_return = 0.
    eval_step = 0
    ret = 0
    best_eval_return = -float('inf')

    if config.max_episodes is not None:
        max_itr, heartbeat_ite = config.max_episodes, config.episodes_between_saves
    else:
        max_itr, heartbeat_ite = config.max_transitions, config.transitions_between_saves
    it = TrainingIterator(max_itr=max_itr, heartbeat_ite=heartbeat_ite)

    to_save = [irl_learner, rl_learner
               ] if not rl_alg_name == '' else [irl_learner]
    to_evaluate = rl_learner  # the performance can only come from one model
    to_watch = to_save if config.wandb_watch_models else []

    eval_perf_queue = deque(maxlen=5)
    ml.wandb_watch(wandb, to_watch)

    # Saves initial model

    ite_model_save_name = f'model_ep{episode}.pt'
    save(to_save, dir_tree.seed_dir, suffix=ite_model_save_name)

    # Initial reset

    state = env.reset()

    disp = config.render
    rl_learner.prep_rollout(rollout_device)

    # Deals with on-policy formulation only for irl updates

    should_update_irl = alg_manager.should_update_on_policy_irl

    # Training loop

    while True:

        # ENVIRONMENT STEP

        if disp:
            env.render()

        action = rl_learner.act(state, sample=True)
        next_state, reward, done, info = env.step(action)

        buffer.push(state, action, next_state, reward, ml.mask(done))

        state = next_state

        ret += reward
        total_transitions += 1

        if config.max_transitions is not None:
            it.touch()
            if pbar is not None:
                pbar.update()

        # episode ending

        if done:
            it.record('return', ret)
            it.record('episode_len',
                      total_transitions - total_transitions_at_last_done)

            for info_key, info_value in info.items():
                if info_key in irl_learner.metrics_to_record:
                    it.record(info_key, info_value)

            state = env.reset()

            ret = 0
            disp = False
            total_transitions_at_last_done = total_transitions

            episode += 1

            if config.max_episodes is not None:
                it.touch()  # increment recorder
                if pbar is not None:
                    pbar.update()

        # TRAIN DISCRIMINATOR

        if should_update_irl(episode=episode,
                             done=done,
                             n_transitions=buffer.n_transitions,
                             total_transitions=total_transitions,
                             config=config):

            # get the training data

            data = buffer.get_all_current_as_np()

            # we prep_rollout because we will eval the policy and not train it
            # but we set the device to train because data is on this device
            rl_learner.prep_rollout(train_device)
            irl_learner.prep_training(train_device)

            return_dict = irl_learner.fit(
                data=data,
                batch_size=config.d_batch_size,
                policy=rl_learner.get_policy(),
                n_epochs_per_update=config.d_epochs_per_update,
                logger=logger,
                heavy_record=config.heavy_record,
                config=config)

            # updates counters

            it.update(return_dict)
            irl_epoch += 1
            if rl_alg_name == "":
                # there is no explicit RL algo so we actually just updated the policy
                rl_epoch = irl_epoch

            # set models back to roll-out mode

            rl_learner.prep_rollout(rollout_device)
            irl_learner.prep_rollout(rollout_device)

            # handles the on-policy buffers

            if rl_alg_name == "":
                # the policy changed (because we updated generator) and thus we must flush the irl_buffer
                buffer.clear()

        # TRAIN RL

        if rl_alg_name != "" and alg_manager.should_update_rl(
                total_transitions=total_transitions,
                episode=episode,
                n_transitions=buffer.n_transitions,
                done=done,
                config=config,
                name=rl_alg_name,
                did_irl_update_first=irl_epoch > rl_epoch):

            # we prep_rollout because we will eval the discriminator and not train it
            # but we set the device to train because data is on this device
            irl_learner.prep_rollout(train_device)
            rl_learner.prep_training(train_device)

            # Take on-policy data and flushes buffer (because generator is about to change)

            experiences = buffer.flush()

            # Evaluate the estimated rewards

            experiences = irl_learner.update_reward(
                experiences=experiences,
                policy=rl_learner.get_policy(),
                ent_wt=config.ent_wt)

            # Train the model

            return_dict = rl_learner.train_model(experiences)
            it.update(return_dict)

            # Updates counter

            rl_epoch += 1

            # Set models back to roll-out mode

            rl_learner.prep_rollout(rollout_device)
            irl_learner.prep_rollout(rollout_device)

        # PLOTTING AND RECORDING

        if it.heartbeat:

            # Recording some metrics

            new_recordings = {
                'total_transitions': total_transitions,
                'irl_epoch': irl_epoch,
                'rl_epoch': rl_epoch,
                'episode': episode,
                'eval_step': eval_step
            }

            performance_metrics = to_evaluate.evaluate_perf(
                env=test_env,
                n_episodes=config.number_of_eval_episodes,
                seed=config.validation_seed,
                sample=config.sample_in_eval,
                render=config.render_evaluate)

            eval_step += 1
            cumul_eval_return += performance_metrics['eval_return']

            performance_metrics['cumul_eval_return'] = cumul_eval_return
            performance_metrics[
                'avg_eval_return'] = cumul_eval_return / eval_step

            new_recordings.update(performance_metrics)

            means_to_log = it.pop_all_means()
            new_recordings.update(means_to_log)
            train_recorder.write_to_tape(new_recordings)
            train_recorder.save(dir_tree.recorders_dir / 'train_recorder.pkl')

            wandb.log(new_recordings, step=eval_step)

            # Saving best model

            eval_perf_queue.append(performance_metrics['eval_return'])
            running_avg_perf = np.mean(eval_perf_queue)

            if running_avg_perf >= best_eval_return:
                new_best = running_avg_perf
                save(to_save, dir_tree.seed_dir, f'model_best.pt')
                save(to_save, wandb_save_dir, f'model_best.pt')
                best_eval_return = new_best

                logger.info(
                    f"Eval Step {eval_step}: Saved new best model at {str(dir_tree.seed_dir / 'model_best.pt')} "
                    f"with average perfo of {new_best}")

            # Saving current model periodically (even if not best)

            if (it.itr % (10 * it.heartbeat_ite)) == 0:
                remove(models=to_save,
                       directory=dir_tree.seed_dir,
                       suffix=ite_model_save_name)
                ite_model_save_name = f"model_eval_step_{eval_step}.pt"
                save(to_save, dir_tree.seed_dir, ite_model_save_name)

                logger.info(
                    f"Eval step {eval_step}: Saved model {str(dir_tree.seed_dir / f'model_ep_{eval_step}.pt')} "
                    f"(avg perfo {running_avg_perf})")

            # Creating and saving plots
            try:
                irl_learner.save_training_graphs(train_recorder,
                                                 dir_tree.seed_dir)
            except ImportError:
                pass
            disp = config.render

        if config.max_episodes is not None:
            if episode > config.max_episodes:
                break

        if config.max_transitions is not None:
            if total_transitions > config.max_transitions:
                break

    # Saving last model

    save(to_save, dir_tree.seed_dir, f"model_ep_{eval_step}.pt")

    logger.info(f"Saved last model: model_ep_{eval_step}.pt")
    logger.info(f"{Path(os.path.abspath(__file__)).parent.name}/{__file__}")

    # finishes logging before exiting training script

    wandb.join()