Exemple #1
0
    # Parse configuration file
    batch_size = configuration['batch_size']
    seed = configuration['seed']
    n_epochs = configuration['n_epochs']

    # Set all seeds for full reproducibility
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

    latent_dim = configuration['Zdim']
    if not os.path.exists('experiments'):
        print('mkdir ', 'experiments')
        os.mkdir('experiments')

    if configuration['encode']:
        experiment = OfflineExperiment(project_name="ali", workspace='timothynest',
                                       offline_directory=str('../experiments/' + configuration['experiment']))
    elif configuration['cluster']:
        experiment = OfflineExperiment(project_name="ali", workspace='timothynest', offline_directory=str(
            '../experiments/' + configuration['experiment'] + '/cluster'))
    experiment.set_name(name=configuration['experiment'])

    experiment.log_parameters(configuration)
    experiment.add_tag(configuration['experiment'])

    # Initiate experiment
    main(datapath, configuration, experiment)
Exemple #2
0
def run_experiment_iter(i, experiment, train_iter, nExp, agent_list, env,
                        video, user_seed, experiment_name, log_params, debug,
                        project_name, sps, sps_es, **kwargs):
    """
    Function used to paralelize the run_experiment calculations.

    Parameters
    ----------
    i : int
        Index of the agent being trained.

    Raises
    ------
    NotImplementedError
        In case Comet is used, raises this error to signal where user intervention
        is required (namely to set the api_key and the workspace).

    Returns
    -------
    rewards : array
        An array with the cumulative rewards, where each column corresponds to
        an agent (random seed), and each row to a training iteration.
    arms : array
        An array with the number of agent arms, where each column corresponds
        to an agent (random seed), and each row to a training iteration.
    agent : Agent
        The trained agent.

    """
    if debug:
        start = time.time()
        print("Experiment {0} out of {1}...".format(i + 1, nExp))
    if not user_seed:
        seed = int.from_bytes(os.urandom(4), 'big')
    else:
        seed = user_seed

    if experiment_name:
        raise NotImplementedError(
            "Before using Comet, you need to come here and set your API key")
        experiment = Experiment(api_key=None,
                                project_name=project_name,
                                workspace=None,
                                display_summary=False,
                                offline_directory="offline")
        experiment.add_tag(experiment_name)
        experiment.set_name("{0}_{1}".format(experiment_name, i))
        # Sometimes adding the tag fails
        log_params["experiment_tag"] = experiment_name
        experiment.log_parameters(log_params)

    agent = agent_list[i]
    if sps_es:  # This one overrides sps
        rewards, arms, agent = run_sps_es_experiment(agent,
                                                     env,
                                                     train_iter,
                                                     seed=seed,
                                                     video=video,
                                                     experiment=experiment,
                                                     **kwargs)
    elif sps:
        rewards, arms, agent = run_sps_experiment(agent,
                                                  env,
                                                  train_iter,
                                                  seed=seed,
                                                  video=video,
                                                  experiment=experiment,
                                                  **kwargs)
    else:
        rewards, arms, agent = run_aql_experiment(agent,
                                                  env,
                                                  train_iter,
                                                  seed=seed,
                                                  video=video,
                                                  experiment=experiment,
                                                  **kwargs)
    agent_list[i] = agent

    if experiment:
        experiment.end()

    if debug:
        end = time.time()
        elapsed = end - start
        units = "secs"
        if elapsed > 3600:
            elapsed /= 3600
            units = "hours"
        elif elapsed > 60:
            elapsed /= 60
            units = "mins"
        print("Time elapsed: {0:.02f} {1}".format(elapsed, units))

    return rewards, arms, agent