예제 #1
0
}

maps = []

# Symmetric (6)
maps += ["3m"]  #, "8m", "25m", "2s3z", "3s5z", "MMM"]

# Asymmetric (6)
# maps += ["5m_6m", "8m_9m", "10m_11m", "27m_30m"]
# maps += ["MMM2", "3s5z_3s6z"]

# Micro (10)
# maps += ["3s_vs_3z", "3s_vs_4z", "3s_vs_5z"]
# maps += ["micro_2M_Z"]
# maps += ["micro_baneling"]
# maps += ["micro_colossus"]
# maps += ["micro_corridor"]
# maps += ["micro_focus"]
# maps += ["micro_retarget"]
# maps += ["micro_bane"]

for map_name in maps:

    name = label
    extend_param_dicts(param_dicts,
                       shared_params, {
                           "name": name,
                           "env_args.map_name": map_name
                       },
                       repeats=parallel_repeat)
    "env_args.steps": 10,
    "env_args.good_branches": 2,

    "batch_size_run": 1,

    "test_interval": 1000,
    "test_nepisode": 64,
    "test_greedy": True,
    "log_interval": 1000,
    "runner_log_interval": 2000,
    "learner_log_interval": 2000,
    "buffer_cpu_only": True, # 5k buffer is too big for VRAM!
    "buffer_size": 1000,
    "epsilon_finish": 0.01,
    "epsilon_anneal_time": 500,
    "discrim_size": 32,
}

name = "noisemix"
extend_param_dicts(param_dicts, shared_params,
    {
        "name": name,
        "noise_dim": [16],
        "bandit_iters": 100,
        "noise_bandit": [True],
        "rnn_discrim": [True],
        "mi_loss": [1],
        # "entropy_scaling": [0.001, 0.01, 0.1]
    },
    repeats=parallel_repeat)
예제 #3
0
    # "batch_size_run": 1,
    "test_interval": 30000,
    "test_nepisode": 8,
    "test_greedy": True,
    "log_interval": 30000,
    "runner_log_interval": 30000,
    "learner_log_interval": 30000,
    "buffer_cpu_only": True,  # 5k buffer is too big for VRAM!
    "buffer_size": 3000,
    "epsilon_finish": 0.05,
    "epsilon_anneal_time": 250000,
    #"discrim_size": 32,
}

name = "noisemix"
extend_param_dicts(
    param_dicts,
    shared_params,
    {
        "env_args.map_name": ["2_corridors"],
        "name": name,
        "noise_dim": [16],
        #"bandit_iters": 100,
        "noise_bandit": [True],
        "rnn_discrim": [True],
        "mi_loss": [0.001],
        "entropy_scaling": [0.001]
    },
    repeats=parallel_repeat)
예제 #4
0
    "test_interval": 30000,
    "test_nepisode": 8,
    "test_greedy": True,
    "save_model": False,
    #"save_model_interval": 250 * 1000,
    "log_interval": 30000,
    "runner_log_interval": 30000,
    "learner_log_interval": 30000,
    "buffer_cpu_only": True,  # 5k buffer is too big for VRAM!
    # "training_iters": 1,
    "buffer_size": 3000
}
name = label + config + "_" + env_config
extend_param_dicts(param_dicts,
                   shared_params, {
                       "lr": [0.0005],
                       "epsilon_anneal_time": [250000],
                       "env_args.reward_only_positive": [False],
                       "env_args.reward_negative_scale": [1.0],
                       "env_args.map_name": ["8m", "MMM2", "10m_vs_11m"],
                       "name": name,
                       "rnn_hidden_dim": [128],
                       "obs_agent_id": [False],
                       "grad_norm_clip": [10],
                       "target_update_mode": ["soft"],
                       "target_update_tau": [0.05],
                       "mi_loss": [0.001, 0.01],
                       "agent": ["noise_rnn_deep"]
                   },
                   repeats=parallel_repeat)