for scenario in scenarios: for seed in seeds: command = [ "python", "ppo_main_nlp.py", "--scenario",scenario, "--rep-type","nlp", "--seed",str(seed), "--env-name","doom", "--button_number",str(button_number), "--algo","ppo", "--lr",str(lr), "--value-loss-coef",str(value_loss_coef), "--num-processes",str(num_processes), "--num-env-steps",str(num_env_steps), "--num-steps",str(num_steps), "--num-mini-batch",str(num_mini_batch), "--log-interval",str(1), "--entropy-coef",str(0.01), "--n-channels",str(1), "--n-patches",str(5) ] p = subprocess.Popen(command,shell=False) processes.append(p) for p in processes: p.wait() for scenario in scenarios: final_plot(scenario, num_updates, len(seeds))
"defend_the_center_extreme", "health_gathering_extreme" ] seeds = [35, 45, 59, 12, 5] processes = [] steps_per_epoch = 500 epochs = 100 frame_repeat = 4 for scenario in scenarios: for seed in seeds: command = [ "python", "dqn_nlp.py", "--SCENARIO", scenario, "--REP_TYPE", "nlp", "--SEED", str(seed), "--BATCH_SIZE", "100", "--ARCH", "TextCNN", "--SENTANCE_LEN", "200", "--LEARNING_STEPS_PER_EPOCH", str(steps_per_epoch), "--HIDDEN_UNITS", "16", "--FILTER_COUNT", "12", "--LEARNING_RATE", "0.00025", "--EPOCHS", str(epochs), "--FRAME_REPEAT", str(frame_repeat) ] p = subprocess.Popen(command, shell=False) processes.append(p) for p in processes: p.wait() for scenario in scenarios: final_plot(scenario, steps_per_epoch, len(seeds))