Esempio n. 1
0
    def _create_experiment(self, environment, environment_params, agent_name,
                           agent_builder_params):
        separator = '.'
        if separator in environment:
            environment_name, environment_id = environment.split(separator)
            environment_params = dict(env_id=environment_id,
                                      **environment_params)
            environment = environment.replace(separator, '_')
        else:
            environment_name = environment

        logger = BenchmarkLogger(log_dir=self.logger.get_path(),
                                 log_id='{}/{}'.format(environment,
                                                       agent_name),
                                 use_timestamp=False)

        try:
            builder = getattr(mushroom_rl_benchmark.builders,
                              '{}Builder'.format(agent_name))
        except AttributeError as e:
            logger.exception(e)

        agent_builder = builder.default(**agent_builder_params)
        env_builder = EnvironmentBuilder(environment_name, environment_params)

        exp = BenchmarkExperiment(agent_builder, env_builder, logger)

        return exp
Esempio n. 2
0
from mushroom_rl_benchmark.builders import EnvironmentBuilder, A2CBuilder

if __name__ == '__main__':

    logger = BenchmarkLogger(log_dir='./logs', log_id='a2c_pendulum')

    agent_builder = A2CBuilder.default(actor_lr=7e-4,
                                       critic_lr=7e-4,
                                       n_features=32)

    env_name = 'Gym'
    env_params = dict(env_id='Pendulum-v0', horizon=200, gamma=.99)

    parallel = dict(max_concurrent_runs=10)

    env_builder = EnvironmentBuilder(env_name, env_params)
    logger.info('Environment is imported')

    exp = BenchmarkExperiment(agent_builder, env_builder, logger)
    logger.info('BenchmarkExperiment was built successfully')

    start_time = time.time()
    exp.run(exec_type='parallel',
            n_runs=10,
            n_epochs=100,
            n_steps=30000,
            n_episodes_test=5,
            parallel=parallel)
    end_time = time.time()
    logger.info('Execution time: {} SEC'.format(end_time - start_time))