Ejemplo n.º 1
0
if __name__ == '__main__':

    log_dir, run_args = read_arguments_run()

    log_id = 'run_' + str(run_args['seed'])

    agent_builder = BenchmarkLogger._load_pickle(
        os.path.join(log_dir, 'agent_builder.pkl'))
    env_builder = BenchmarkLogger._load_pickle(
        os.path.join(log_dir, 'environment_builder.pkl'))

    logger = BenchmarkLogger(log_dir=log_dir,
                             log_id=log_id,
                             use_timestamp=False)

    logger.info('Starting experiment.')

    result = exec_run(agent_builder, env_builder, **run_args)

    logger.info('Saving result.')

    cmp_E = agent_builder.compute_policy_entropy

    logger.save_Js([result['Js']])
    logger.save_Rs([result['Rs']])
    logger.save_Qs([result['Qs']])
    if cmp_E:
        logger.save_policy_entropies([result['Es']])
    new_score = result['score']
    new_agent = result['builders']
    stats = dict(best_J=new_score[0], best_R=new_score[1], best_Q=new_score[2])
Ejemplo n.º 2
0
if __name__ == '__main__':

    logger = BenchmarkLogger(log_dir='./logs', log_id='a2c_pendulum')

    agent_builder = A2CBuilder.default(actor_lr=7e-4,
                                       critic_lr=7e-4,
                                       n_features=32)

    env_name = 'Gym'
    env_params = dict(env_id='Pendulum-v0', horizon=200, gamma=.99)

    parallel = dict(max_concurrent_runs=10)

    env_builder = EnvironmentBuilder(env_name, env_params)
    logger.info('Environment is imported')

    exp = BenchmarkExperiment(agent_builder, env_builder, logger)
    logger.info('BenchmarkExperiment was built successfully')

    start_time = time.time()
    exp.run(exec_type='parallel',
            n_runs=10,
            n_epochs=100,
            n_steps=30000,
            n_episodes_test=5,
            parallel=parallel)
    end_time = time.time()
    logger.info('Execution time: {} SEC'.format(end_time - start_time))

    exp.save_plot()