Esempio n. 1
0
    def test_simple_cmd(self):
        logging.disable(logging.INFO)

        echo_params = ParamGrid([
            ('p1', [3.14, 2.71]),
            ('p2', ['a', 'b', 'c']),
            ('p3', list(np.arange(3))),
        ])
        experiments = [
            Experiment('test_echo1', 'echo', echo_params.generate_params(randomize=True)),
            Experiment('test_echo2', 'echo', echo_params.generate_params(randomize=False)),
        ]
        train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests'))
        root_dir_name = '__test_run__'
        rd = RunDescription(root_dir_name, experiments)

        args = runner_argparser().parse_args([])
        args.max_parallel = 8
        args.pause_between = 0
        args.train_dir = train_dir

        run(rd, args)

        rd2 = RunDescription(root_dir_name, experiments, experiment_dirs_sf_format=False, experiment_arg_name='--experiment_tst', experiment_dir_arg_name='--dir')
        run(rd2, args)

        logging.disable(logging.NOTSET)

        shutil.rmtree(join(train_dir, root_dir_name))
Esempio n. 2
0
    def test_param_grid(self):
        grid = ParamGrid([
            ('p1', [0, 1]),
            ('p2', ['a', 'b', 'c']),
            ('p3', [None, {}]),
        ])

        param_combinations = grid.generate_params(randomize=True)
        for p in param_combinations:
            for key in ('p1', 'p2', 'p3'):
                self.assertIn(key, p)

        param_combinations = list(grid.generate_params(randomize=False))
        self.assertEqual(param_combinations[0], {'p1': 0, 'p2': 'a', 'p3': None})
        self.assertEqual(param_combinations[1], {'p1': 0, 'p2': 'a', 'p3': {}})
        self.assertEqual(param_combinations[-2], {'p1': 1, 'p2': 'c', 'p3': None})
        self.assertEqual(param_combinations[-1], {'p1': 1, 'p2': 'c', 'p3': {}})
Esempio n. 3
0
 def test_experiment(self):
     params = ParamGrid([('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c'])])
     cmd = 'python super_rl.py'
     ex = Experiment('test', cmd, params.generate_params(randomize=False))
     cmds = ex.generate_experiments('train_dir', customize_experiment_name=True, param_prefix='--')
     for index, value in enumerate(cmds):
         command, name = value
         self.assertTrue(command.startswith(cmd))
         self.assertTrue(name.startswith(f'0{index}_test'))
Esempio n. 4
0
 def test_descr(self):
     params = ParamGrid([('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c'])])
     experiments = [
         Experiment('test1', 'python super_rl1.py', params.generate_params(randomize=False)),
         Experiment('test2', 'python super_rl2.py', params.generate_params(randomize=False)),
     ]
     rd = RunDescription('test_run', experiments)
     cmds = rd.generate_experiments('train_dir')
     for command, name, root_dir, env_vars in cmds:
         exp_name = split(root_dir)[-1]
         self.assertIn('--experiment', command)
         self.assertIn('--experiments_root', command)
         self.assertTrue(exp_name in name)
         self.assertTrue(root_dir.startswith('test_run'))
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('env', ['voxel_env_multitask_Obstacles']),
    ('use_cpc', ['True']),
    ('seed', [11111, 22222, 33333, 44444, 55555]),
])

_cli = 'python -m sample_factory.algorithms.appo.train_appo --train_for_seconds=360000000 --train_for_env_steps=10000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=2 --reward_clip=30 --pbt_mix_policies_in_one_env=False'

EXPERIMENT_1AGENT = Experiment(
    'voxel_env_multitask_obs',
    _cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_env_v115_multitask_obstacles_v55',
                                 experiments=[EXPERIMENT_1AGENT])
NUM_WORKERS = 20  # typically num logical cores
NUM_WORKERS_VOXEL_ENV = 10  # typically num logical cores / 2, limited by the num of available Vulkan contexts
TIMEOUT_SECONDS = 180
SAMPLER_GPUS = '0'  # replace with '0 1 2 3 4 5 6 7' for 8-GPU server

_basic_cli = f'python -m sample_factory.run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS} --num_envs_per_worker=1 --experiment=benchmark --timeout_seconds={TIMEOUT_SECONDS}'

_params_basic_envs = ParamGrid([
    ('env', ['doom_benchmark', 'atari_breakout', 'dmlab_benchmark']),
])

_experiment_basic_envs = Experiment(
    'benchmark_basic_envs',
    _basic_cli,
    _params_basic_envs.generate_params(randomize=False),
)

_voxel_env_cli = f'python -m sample_factory.run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=1 --experiment=benchmark --sampler_worker_gpus {SAMPLER_GPUS} --voxel_num_envs_per_instance=64 --voxel_num_agents_per_env=2 --voxel_num_simulation_threads=2 --timeout_seconds={TIMEOUT_SECONDS}'

_params_voxel_env = ParamGrid([
    ('env', ['voxel_env_obstacleshard']),
    ('voxel_use_vulkan', [True, False]),
])

_experiment_voxel_env = Experiment(
    'benchmark_voxel_env',
    _voxel_env_cli,
    _params_voxel_env.generate_params(randomize=False),
)
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

NUM_WORKERS_VOXEL_ENV = 48  # typically num logical cores / 2, limited by the num of available Vulkan contexts
TIMEOUT_SECONDS = 180
SAMPLER_GPUS = '0 1 2 3 4 5 6 7'  # replace with '0 1 2 3 4 5 6 7' for 8-GPU server

_voxel_env_cli = f'python -m sample_factory.run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=1 --experiment=benchmark --sampler_worker_gpus {SAMPLER_GPUS} --voxel_num_envs_per_instance=64 --voxel_num_agents_per_env=2 --voxel_num_simulation_threads=2 --timeout_seconds={TIMEOUT_SECONDS}'

_params_voxel_env = ParamGrid([
    ('env', [
        'voxel_env_TowerBuilding', 'voxel_env_ObstaclesEasy',
        'voxel_env_ObstaclesHard', 'voxel_env_Collect', 'voxel_env_Sokoban',
        'voxel_env_HexMemory', 'voxel_env_HexExplore', 'voxel_env_Rearrange'
    ]),
    ('voxel_use_vulkan', [True]),
])

_experiment_voxel_env = Experiment(
    'benchmark_voxel_env_8',
    _voxel_env_cli,
    _params_voxel_env.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_bench_sampling_all_envs',
                                 experiments=[_experiment_voxel_env])
Esempio n. 8
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params_earlystop = ParamGrid([
    ('seed', [0000, 1111, 2222, 3333, 4444]),
])

_experiment_earlystop = Experiment(
    'lunar_lander_cont',
    'python -m sample_factory_examples.train_gym_env --train_for_env_steps=500000000 --algo=APPO --num_workers=20 --num_envs_per_worker=6 --seed 0 --gae_lambda 0.99 --experiment=lunar_lander_2 --env=gym_LunarLanderContinuous-v2 --exploration_loss_coeff=0.0 --max_grad_norm=0.0 --encoder_type=mlp --encoder_subtype=mlp_mujoco --encoder_extra_fc_layers=0 --hidden_size=128 --policy_initialization=xavier_uniform --actor_critic_share_weights=False --adaptive_stddev=False --recurrence=1 --use_rnn=False --batch_size=256 --ppo_epochs=4 --with_vtrace=False --reward_scale=0.05 --max_policy_lag=100000 --save_every_sec=15 --experiment_summaries_interval=10',
    _params_earlystop.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('lunar_lander_cont_v100', experiments=[_experiment_earlystop])