Exemple #1
0
    def test_simple_cmd(self):
        logging.disable(logging.INFO)

        echo_params = ParamGrid([
            ('p1', [3.14, 2.71]),
            ('p2', ['a', 'b', 'c']),
            ('p3', list(np.arange(3))),
        ])
        experiments = [
            Experiment('test_echo1', 'echo', echo_params.generate_params(randomize=True)),
            Experiment('test_echo2', 'echo', echo_params.generate_params(randomize=False)),
        ]
        train_dir = ensure_dir_exists(join(project_tmp_dir(), 'tests'))
        root_dir_name = '__test_run__'
        rd = RunDescription(root_dir_name, experiments)

        args = runner_argparser().parse_args([])
        args.max_parallel = 8
        args.pause_between = 0
        args.train_dir = train_dir

        run(rd, args)

        rd2 = RunDescription(root_dir_name, experiments, experiment_dirs_sf_format=False, experiment_arg_name='--experiment_tst', experiment_dir_arg_name='--dir')
        run(rd2, args)

        logging.disable(logging.NOTSET)

        shutil.rmtree(join(train_dir, root_dir_name))
Exemple #2
0
 def test_descr(self):
     params = ParamGrid([('p1', [3.14, 2.71]), ('p2', ['a', 'b', 'c'])])
     experiments = [
         Experiment('test1', 'python super_rl1.py', params.generate_params(randomize=False)),
         Experiment('test2', 'python super_rl2.py', params.generate_params(randomize=False)),
     ]
     rd = RunDescription('test_run', experiments)
     cmds = rd.generate_experiments('train_dir')
     for command, name, root_dir, env_vars in cmds:
         exp_name = split(root_dir)[-1]
         self.assertIn('--experiment', command)
         self.assertIn('--experiments_root', command)
         self.assertTrue(exp_name in name)
         self.assertTrue(root_dir.startswith('test_run'))
Exemple #3
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('seed', [1111, 2222, 3333]),
    ('env', ['doom_defend_the_center_flat_actions']),
    ('num_envs_per_worker', [16]),
])

_experiments = [
    Experiment(
        'basic_envs_fs4',
        'python -m sample_factory.algorithms.appo.train_appo --train_for_env_steps=100000000 --algo=APPO --env_frameskip=4 --use_rnn=True --rnn_type=lstm --num_workers=72 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False --policy_workers_per_policy=3 --experiment_summaries_interval=5 --ppo_clip_value=10.0 --nonlinearity=relu',
        _params.generate_params(randomize=False),
    ),
]

RUN_DESCRIPTION = RunDescription('paper_doom_wall_time_v97_fs4',
                                 experiments=_experiments)
Exemple #4
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid
from swarm_rl.runs.quad_multi_mix_baseline import QUAD_BASELINE_CLI

_params = ParamGrid([
    ('quads_collision_falloff_radius', [4.0]),
    ('quads_collision_reward', [5.0]),
    ('quads_collision_smooth_max_penalty', [10.0]),
    ('quads_neighbor_encoder_type', ['attention']),
    ('replay_buffer_sample_prob', [0.75]),
])

PBT_CLI = QUAD_BASELINE_CLI + (
    ' --pbt_replace_reward_gap=0.2 --pbt_replace_reward_gap_absolute=200.0 --pbt_period_env_steps=10000000 --pbt_start_mutation=50000000 --with_pbt=True --num_policies=8'
    ' --pbt_mix_policies_in_one_env=False'
    ' --num_workers=72 --num_envs_per_worker=10')

_experiment = Experiment(
    'quad_mix_baseline-8_pbt',
    PBT_CLI,
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('quads_multi_baseline_pbt8_v116',
                                 experiments=[_experiment])
Exemple #5
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([])

_experiment = Experiment(
    'basic',
    'python -m sample_factory.algorithms.appo.train_appo --env=doom_basic --train_for_env_steps=3000000 --algo=APPO --env_frameskip=4 --use_rnn=True --ppo_epochs=1 --rollout=32 --recurrence=32 --wide_aspect_ratio=False --num_workers=20 --num_envs_per_worker=20 --experiment=doom_basic',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('doom_basic', experiments=[_experiment])
Exemple #6
0
    ('quads_obstacle_num', [1]),
    ('quads_obstacle_type', ['sphere']),
    ('quads_obstacle_traj', ['mix']),
    ('quads_collision_obstacle_reward', [5.0]),
    ('quads_obstacle_obs_mode', ['absolute']),
    ('quads_collision_obst_smooth_max_penalty', [10.0]),
    ('quads_obstacle_hidden_size', [256]),
    ('replay_buffer_sample_prob', [0.0]),
    ('quads_obst_penalty_fall_off', [10.0]),
])

_experiment = Experiment(
    'quad_mix_baseline_obst_mix-8a',
    QUAD_BASELINE_CLI,
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('quads_multi_mix_obst_mix_8a_v116',
                                 experiments=[_experiment])

# On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable,
# then, try to use two commands below
# export OMP_NUM_THREADS=1
# export USE_SIMPLE_THREADED_LEVEL3=1

# Command to use this script on server:
# xvfb-run python -m runner.run --run=quad_multi_mix_baseline_obstacle_mix --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
# Command to use this script on local machine:
# Please change num_workers to the physical cores of your local machine
# python -m runner.run --run=quad_multi_mix_baseline_obstacle_mix --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
Exemple #7
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([])

_experiments = [
    Experiment(
        'battle_fs4',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=4000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False --num_workers=72 --num_envs_per_worker=32 --num_policies=8 --with_pbt=True',
        _params.generate_params(randomize=False),
    ),
]

RUN_DESCRIPTION = RunDescription('paper_doom_battle_appo_pbt_v98_fs4',
                                 experiments=_experiments)
ACTOR_GPUS = '0'  # replace with '0 1 2 3 4 5 6 7' for 8-GPU server
NUM_POLICIES = 1

_basic_cli = f'python -m sample_factory.algorithms.appo.train_appo --train_for_seconds={TIMEOUT_SECONDS} --train_for_env_steps=20000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers={NUM_WORKERS} --num_envs_per_worker=16 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --num_policies={NUM_POLICIES} --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --policy_workers_per_policy=2 --learner_main_loop_num_cores=4 --reward_clip=30'

_params_basic_envs = ParamGrid([
    ('env', ['doom_benchmark', 'atari_breakout', 'dmlab_benchmark']),
])

_experiment_basic_envs = Experiment(
    'benchmark_basic_envs',
    _basic_cli,
    _params_basic_envs.generate_params(randomize=False),
)

_voxel_cli = f'python -m sample_factory.algorithms.appo.train_appo --train_for_seconds={TIMEOUT_SECONDS} --train_for_env_steps=20000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus {ACTOR_GPUS} --num_policies={NUM_POLICIES} --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=2 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4 --reward_clip=30 --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1 --pbt_mix_policies_in_one_env=False'
_params_voxel_env = ParamGrid([
    ('env', ['voxel_env_obstacleshard']),
    ('voxel_use_vulkan', [True, False]),
])

_experiment_voxel_env = Experiment(
    'benchmark_voxel_env',
    _voxel_cli,
    _params_voxel_env.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription(
    'voxel_train_benchmark',
    experiments=[_experiment_basic_envs, _experiment_voxel_env])
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

NUM_WORKERS_VOXEL_ENV = 48  # typically num logical cores / 2, limited by the num of available Vulkan contexts
TIMEOUT_SECONDS = 180
SAMPLER_GPUS = '0 1 2 3 4 5 6 7'  # replace with '0 1 2 3 4 5 6 7' for 8-GPU server

_voxel_env_cli = f'python -m sample_factory.run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=1 --experiment=benchmark --sampler_worker_gpus {SAMPLER_GPUS} --voxel_num_envs_per_instance=64 --voxel_num_agents_per_env=2 --voxel_num_simulation_threads=2 --timeout_seconds={TIMEOUT_SECONDS}'

_params_voxel_env = ParamGrid([
    ('env', [
        'voxel_env_TowerBuilding', 'voxel_env_ObstaclesEasy',
        'voxel_env_ObstaclesHard', 'voxel_env_Collect', 'voxel_env_Sokoban',
        'voxel_env_HexMemory', 'voxel_env_HexExplore', 'voxel_env_Rearrange'
    ]),
    ('voxel_use_vulkan', [True]),
])

_experiment_voxel_env = Experiment(
    'benchmark_voxel_env_8',
    _voxel_env_cli,
    _params_voxel_env.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_bench_sampling_all_envs',
                                 experiments=[_experiment_voxel_env])
Exemple #10
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('seed', [0000, 3333]),
    ('quads_obs_repr', ['xyz_vxyz_R_omega', 'xyz_vxyz_R_omega_wall']),
])

_experiment = Experiment(
    'grid_search_add_wall_collision_func-8_mixed',
    'python -m swarm_rl.train --env=quadrotor_multi --train_for_env_steps=5000000000 --algo=APPO --use_rnn=False --num_workers=72 --num_envs_per_worker=4 --learning_rate=0.0001 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=256 --quads_neighbor_hidden_size=256 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=5.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=1024 --quads_use_numba=True --quads_num_agents=8 --quads_episode_duration=15.0 --quads_mode=mix --quads_formation_size=0.0 --encoder_custom=quad_multi_encoder --with_pbt=False --quads_neighbor_encoder_type=attention --quads_collision_reward=5.0 --neighbor_obs_type=pos_vel',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('quads_multi_mixed_pvg_v112',
                                 experiments=[_experiment])

# On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable,
# then, try to use two commands below
# export OMP_NUM_THREADS=1
# export USE_SIMPLE_THREADED_LEVEL3=1

# Command to use this script on server:
# xvfb-run python -m runner.run --run=quad_multi_mix_obs_add_walls --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
# Command to use this script on local machine:
# Please change num_workers to the physical cores of your local machine
# python -m runner.run --run=quad_multi_mix_obs_add_walls --runner=processes --max_parallel=4 --pause_between=1 --experiments_per_gpu=1 --num_gpus=4
Exemple #11
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiments = [
    Experiment(
        'battle_fs4_100',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=1000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=10 --num_envs_per_worker=10 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
    ),

    Experiment(
        'battle_fs4_400',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=1000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
    ),

    Experiment(
        'battle_fs4_800',
        'algorithms.appo.train_appo --env=doom_battle --train_for_env_steps=1000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=20 --num_envs_per_worker=40 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
    ),
]


RUN_DESCRIPTION = RunDescription('paper_policy_lag_v66_fs4', experiments=_experiments)
Exemple #12
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('env', ['MiniGrid-MemoryS7-v0', 'MiniGrid-RedBlueDoors-8x8-v0', 'MiniGrid-MemoryS17Random-v0']),
])

_experiment = Experiment(
    'mem_minigrid_obs',
    'python -m train_pytorch --algo=PPO --rollout=64 --num_envs=96 --recurrence=1 --use_rnn=False --train_for_env_steps=200000000 --prior_loss_coeff=0.005 --obs_mem=True',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('mem_minigrid_obs_v26', experiments=[_experiment], pause_between_experiments=5, use_gpus=2, experiments_per_gpu=4, max_parallel=12)
Exemple #13
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid
from swarm_rl.runs.quad_multi_mix_baseline import QUAD_BASELINE_CLI

_params = ParamGrid([
    ('quads_neighbor_encoder_type', ['mlp']),
    ('seed', [0000, 1111, 2222, 3333]),
])

_experiment = Experiment(
    'quad_mix_baseline-8_mixed_mlp',
    QUAD_BASELINE_CLI,
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('paper_quads_multi_mix_baseline_8a_mlp_v116',
                                 experiments=[_experiment])

# On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable,
# then, try to use two commands below
# export OMP_NUM_THREADS=1
# export USE_SIMPLE_THREADED_LEVEL3=1

# Command to use this script on server:
# xvfb-run python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
# Command to use this script on local machine:
# Please change num_workers to the physical cores of your local machine
# python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
Exemple #14
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiment = Experiment(
    'static_goal-agents_8',
    'python -m swarm_rl.train --env=quadrotor_multi --train_for_env_steps=1000000000 --algo=APPO --use_rnn=False --num_workers=72 --num_envs_per_worker=4 --learning_rate=0.0001 --ppo_clip_value=5.0 --recurrence=1 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=256 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=5.0 --exploration_loss_coeff=0.0 --rollout=128 --batch_size=1024 --extend_obs=True --quads_use_numba=True --quads_num_agents=8 --quads_episode_duration=10.0 --quads_mode=static_goal --quads_dist_between_goals=0.0 --quads_collision_reward=1.0 --encoder_custom=quad_multi_encoder --pbt_replace_reward_gap=0.1 --pbt_replace_reward_gap_absolute=200.0 --pbt_period_env_steps=1000000 --pbt_start_mutation=20000000 --with_pbt=True --num_policies=8',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('quads_multi_same_goal_pbt_v112',
                                 experiments=[_experiment])

# this is just a placeholder for command line. You can run it through runner or just copy-paste to command line and run
Exemple #15
0
    '--encoder_custom=quad_multi_encoder --with_pbt=False --quads_collision_reward=5.0 '
    '--quads_neighbor_hidden_size=256 --neighbor_obs_type=pos_vel '
    '--quads_settle_reward=0.0 --quads_collision_hitbox_radius=2.0 --quads_collision_falloff_radius=4.0 '
    '--quads_local_obs=6 --quads_local_metric=dist '
    '--quads_local_coeff=1.0 --quads_num_agents=8 '
    '--quads_collision_reward=5.0 '
    '--quads_collision_smooth_max_penalty=10.0 '
    '--quads_neighbor_encoder_type=attention '
    '--replay_buffer_sample_prob=0.75 '
    '--anneal_collision_steps=300000000')

_experiment = Experiment(
    'quad_mix_baseline-8_mixed',
    QUAD_BASELINE_CLI,
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('quads_multi_mix_baseline_8a_local_v116',
                                 experiments=[_experiment])

# On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable,
# then, try to use two commands below
# export OMP_NUM_THREADS=1
# export USE_SIMPLE_THREADED_LEVEL3=1

# Command to use this script on server:
# xvfb-run python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
# Command to use this script on local machine:
# Please change num_workers to the physical cores of your local machine
# python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
Exemple #16
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiment = Experiment(
    'bots_freedm_fs2',
    'python -m sample_factory.algorithms.appo.train_appo --env=doom_freedm --train_for_seconds=360000 --algo=APPO --gamma=0.995 --env_frameskip=2 --use_rnn=True --reward_scale=0.5 --num_workers=20 --num_envs_per_worker=4 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --benchmark=False --start_bot_difficulty=150',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('doom_freedm_v64_fs2',
                                 experiments=[_experiment],
                                 pause_between_experiments=100,
                                 use_gpus=1,
                                 experiments_per_gpu=-1,
                                 max_parallel=1)
from sample_factory.runner.run_description import RunDescription
from sample_factory.runner.runs.voxel_base_experiments import EXPERIMENT_1AGENT

RUN_DESCRIPTION = RunDescription('voxel_env_v115_single_v55', experiments=[EXPERIMENT_1AGENT])
Exemple #18
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiment = Experiment(
    'quads_pbt',
    'python -m swarm_rl.train --env=quadrotor_single --train_for_seconds=3600000 --algo=APPO --gamma=0.99 --use_rnn=False --num_workers=72 --num_envs_per_worker=4 --num_policies=8 --ppo_epochs=1 --rollout=128 --recurrence=1 --batch_size=512 --benchmark=False --pbt_replace_reward_gap=0.1 --pbt_replace_reward_gap_absolute=200.0 --pbt_period_env_steps=1000000 --pbt_start_mutation=20000000 --with_pbt=True --adam_eps=1e-8 --nonlinearity=tanh --actor_critic_share_weights=False --policy_initialization=xavier_uniform --adaptive_stddev=False --hidden_size=64 --with_vtrace=False --max_policy_lag=100000000 --gae_lambda=1.00 --max_grad_norm=0.0 --ppo_clip_value=5.0 --exploration_loss_coeff=0.00001 --learning_rate=5e-4',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('quads_single_pbt_v96_v2',
                                 experiments=[_experiment])
Exemple #19
0
    Experiment(
        'bots_128_fs2_wide',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=True',
        _params.generate_params(randomize=False),
        dict(DOOM_DEFAULT_UDP_PORT=35300),
    ),

    Experiment(
        'bots_128_fs2_narrow',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
        dict(DOOM_DEFAULT_UDP_PORT=40300),
    ),

    Experiment(
        'bots_128_fs2_wide_adam0.5',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=True --adam_beta1=0.5',
        _params.generate_params(randomize=False),
        dict(DOOM_DEFAULT_UDP_PORT=45300),
    ),

    Experiment(
        'bots_128_fs2_narrow_adam0.5',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_dwango5_bots_experimental --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --reward_scale=0.5 --num_workers=18 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=False --adam_beta1=0.5',
        _params.generate_params(randomize=False),
        dict(DOOM_DEFAULT_UDP_PORT=50300),
    ),
]

RUN_DESCRIPTION = RunDescription('doom_bots_v60_sweep', experiments=_experiments, pause_between_experiments=120, use_gpus=4, experiments_per_gpu=1, max_parallel=4)
Exemple #20
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('seed', [42]),
])

_experiments = [
    Experiment(
        'bots_128_fs2_narrow',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_deathmatch_bots --train_for_seconds=3600000 --algo=APPO --use_rnn=True --gamma=0.995 --env_frameskip=2 --rollout=32 --num_workers=80 --num_envs_per_worker=24 --num_policies=8 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --res_w=128 --res_h=72 --wide_aspect_ratio=False --with_pbt=True --pbt_period_env_steps=5000000',
        _params.generate_params(randomize=False),
        dict(DOOM_DEFAULT_UDP_PORT=35300),
    ),
]

RUN_DESCRIPTION = RunDescription('doom_bots_v100_pbt',
                                 experiments=_experiments)
Exemple #21
0
from sample_factory.runner.run_description import RunDescription
from sample_factory.runner.runs.voxel_base_experiments import EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS

RUN_DESCRIPTION = RunDescription(
    'voxel_env_v115_multi_agent_v55',
    experiments=[EXPERIMENT_2AGENTS, EXPERIMENT_4AGENTS])
Exemple #22
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiment = Experiment(
    'bots_ssl2_fs2',
    'python -m sample_factory.algorithms.appo.train_appo --env=doom_duel_bots --train_for_seconds=360000 --algo=APPO --gamma=0.995 --env_frameskip=2 --use_rnn=True --reward_scale=0.5 --num_workers=72 --num_envs_per_worker=32 --num_policies=8 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --benchmark=False --res_w=128 --res_h=72 --wide_aspect_ratio=False --pbt_replace_reward_gap=0.2 --pbt_replace_reward_gap_absolute=3.0 --pbt_period_env_steps=5000000 --save_milestones_sec=1800 --with_pbt=True',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('paper_doom_duel_bots_v98_fs2', experiments=[_experiment])
Exemple #23
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiments = [
    Experiment(
        'battle_d4_fs4_pbt',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_battle_d4 --train_for_env_steps=50000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --reward_scale=0.5 --num_workers=24 --num_envs_per_worker=30 --num_policies=4 --ppo_epochs=1 --rollout=32 --recurrence=32 --macro_batch=2048 --batch_size=2048 --wide_aspect_ratio=False --pbt_period_env_steps=5000000',
        _params.generate_params(randomize=False),
    ),
]

RUN_DESCRIPTION = RunDescription('doom_battle_d4_appo_v64_fs4_pbt',
                                 experiments=_experiments,
                                 use_gpus=2,
                                 experiments_per_gpu=-1,
                                 max_parallel=1)
Exemple #24
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('seed', [1111, 2222, 3333]),
])

_experiments = [
    Experiment(
        'battle2_fs4',
        'python -m sample_factory.algorithms.appo.train_appo --env=doom_battle2 --train_for_env_steps=3000000000 --algo=APPO --env_frameskip=4 --use_rnn=True --reward_scale=0.5 --num_workers=20 --num_envs_per_worker=20 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
    ),
]

RUN_DESCRIPTION = RunDescription('doom_battle2_appo_v1.119.0_fs4',
                                 experiments=_experiments)
SAMPLER_GPUS = '0'  # replace with '0 1 2 3 4 5 6 7' for 8-GPU server

_basic_cli = f'python -m sample_factory.run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS} --num_envs_per_worker=1 --experiment=benchmark --timeout_seconds={TIMEOUT_SECONDS}'

_params_basic_envs = ParamGrid([
    ('env', ['doom_benchmark', 'atari_breakout', 'dmlab_benchmark']),
])

_experiment_basic_envs = Experiment(
    'benchmark_basic_envs',
    _basic_cli,
    _params_basic_envs.generate_params(randomize=False),
)

_voxel_env_cli = f'python -m sample_factory.run_algorithm --algo=DUMMY_SAMPLER --num_workers={NUM_WORKERS_VOXEL_ENV} --num_envs_per_worker=1 --experiment=benchmark --sampler_worker_gpus {SAMPLER_GPUS} --voxel_num_envs_per_instance=64 --voxel_num_agents_per_env=2 --voxel_num_simulation_threads=2 --timeout_seconds={TIMEOUT_SECONDS}'

_params_voxel_env = ParamGrid([
    ('env', ['voxel_env_obstacleshard']),
    ('voxel_use_vulkan', [True, False]),
])

_experiment_voxel_env = Experiment(
    'benchmark_voxel_env',
    _voxel_env_cli,
    _params_voxel_env.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription(
    'voxel_bench_sampling',
    experiments=[_experiment_basic_envs, _experiment_voxel_env])
Exemple #26
0
from swarm_rl.runs.quad_multi_mix_baseline import QUAD_BASELINE_CLI

_params = ParamGrid([
    ('seed', [0000, 1111, 2222, 3333]),
])

_experiment_no_replay = Experiment(
    'quad_mix_baseline-8_mixed_noreplay',
    QUAD_BASELINE_CLI + ' --replay_buffer_sample_prob=0.00',
    _params.generate_params(randomize=False),
)

_experiment_no_anneal = Experiment(
    'quad_mix_baseline-8_mixed_noannealing',
    QUAD_BASELINE_CLI + ' --anneal_collision_steps=0',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('paper_quads_multi_mix_baseline_8a_ablation_v116', experiments=[_experiment_no_replay, _experiment_no_anneal])

# On Brain server, when you use num_workers = 72, if the system reports: Resource temporarily unavailable,
# then, try to use two commands below
# export OMP_NUM_THREADS=1
# export USE_SIMPLE_THREADED_LEVEL3=1

# Command to use this script on server:
# xvfb-run python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
# Command to use this script on local machine:
# Please change num_workers to the physical cores of your local machine
# python -m runner.run --run=quad_multi_mix_baseline --runner=processes --max_parallel=3 --pause_between=1 --experiments_per_gpu=1 --num_gpus=3
Exemple #27
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('seed', [0, 1111, 2222, 3333, 4444, 5555, 6666, 7777, 8888, 9999]),
    ('env', [
        'doom_my_way_home', 'doom_deadly_corridor', 'doom_defend_the_center',
        'doom_defend_the_line', 'doom_health_gathering',
        'doom_health_gathering_supreme'
    ]),
])

_experiments = [
    Experiment(
        'basic_envs_fs4',
        'python -m sample_factory.algorithms.appo.train_appo --train_for_env_steps=500000000 --algo=APPO --env_frameskip=4 --use_rnn=True --num_workers=36 --num_envs_per_worker=8 --num_policies=1 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --wide_aspect_ratio=False',
        _params.generate_params(randomize=False),
    ),
]

RUN_DESCRIPTION = RunDescription('paper_doom_basic_envs_appo_v97_fs4',
                                 experiments=_experiments)
Exemple #28
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('env', ['doom_two_colors_easy', 'doom_two_colors_hard']),
    ('use_rnn', [True, False]),
    ('mem_size', [4, 0]),
])

_experiment = Experiment(
    'mem_doom',
    'python -m train_pytorch --algo=PPO --train_for_env_steps=1000000000 --prior_loss_coeff=0.005 --reward_scale=0.5',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('mem_doom_v39',
                                 experiments=[_experiment],
                                 pause_between_experiments=10,
                                 use_gpus=2,
                                 experiments_per_gpu=2,
                                 max_parallel=4)
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('env', ['voxel_env_multitask_Obstacles']),
    ('use_cpc', ['True']),
    ('seed', [11111, 22222, 33333, 44444, 55555]),
])

_cli = 'python -m sample_factory.algorithms.appo.train_appo --train_for_seconds=360000000 --train_for_env_steps=10000000000 --algo=APPO --gamma=0.997 --use_rnn=True --rnn_num_layers=2 --num_workers=12 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 --num_policies=1 --with_pbt=False --max_grad_norm=0.0 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --voxel_num_simulation_threads=1 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=2 --reward_clip=30 --pbt_mix_policies_in_one_env=False'

EXPERIMENT_1AGENT = Experiment(
    'voxel_env_multitask_obs',
    _cli + ' --voxel_num_envs_per_instance=36 --voxel_num_agents_per_env=1',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_env_v115_multitask_obstacles_v55',
                                 experiments=[EXPERIMENT_1AGENT])
Exemple #30
0
from sample_factory.runner.run_description import RunDescription, Experiment, ParamGrid

_params = ParamGrid([
    ('ppo_epochs', [1]),
])

_experiment = Experiment(
    'voxel_env_pbt',
    'python -m sample_factory.algorithms.appo.train_appo --env=voxel_env_v23_v --train_for_seconds=360000000 --algo=APPO --gamma=0.997 --use_rnn=True --num_workers=28 --num_envs_per_worker=2 --ppo_epochs=1 --rollout=32 --recurrence=32 --batch_size=2048 --actor_worker_gpus 0 1 2 3 4 5 6 7 --num_policies=8 --with_pbt=True --max_grad_norm=0.0 --pbt_replace_reward_gap_absolute=0.3 --exploration_loss=symmetric_kl --exploration_loss_coeff=0.001 --pbt_mix_policies_in_one_env=False --experiment=voxel_env_v23_v --voxel_num_envs_per_instance=48 --voxel_num_agents_per_env=4 --voxel_num_simulation_threads=4 --voxel_vertical_look_limit=0.2 --voxel_use_vulkan=True --policy_workers_per_policy=2 --learner_main_loop_num_cores=4',
    _params.generate_params(randomize=False),
)

RUN_DESCRIPTION = RunDescription('voxel_env_pbt_v112_env_v23_8p',
                                 experiments=[_experiment])