def test_agent(agent_step): for coef_index in range(len(CLAC_COEFS)): mut_coef = CLAC_COEFS[coef_index] if (agent_step == 1): print(mut_coef, " ", NUM_TRAINING_STEPS, " ", ENVIRONMENT_NAME, " ", FOLDER) features = pd.DataFrame() mirl_env = gym.make(ENVIRONMENT_NAME) mirl_env = DummyVecEnv([lambda: mirl_env]) mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=3.3e-4, verbose=1) (mirl_model, learning_results) = mirl_model.learn( total_timesteps=NUM_TRAINING_STEPS, log_interval=10) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") for resample_step in range(1, NUM_RESAMPLES): # Set both environments to the same resampled values if (RANDOMIZATION_LEVEL == "Normal"): mirl_env.env_method("randomize", 0) elif (RANDOMIZATION_LEVEL == "Extreme"): mirl_env.env_method("randomize", 1) elif (RANDOMIZATION_LEVEL == "Test"): mirl_env.env_method("randomize", -1) else: print("Error resampling unknown value: ", RANDOMIZATION_LEVEL) continue if (agent_step == 1): print(mut_coef, " ", NUM_TRAINING_STEPS, " ", ENVIRONMENT_NAME, " ", FOLDER, " resample step ", resample_step) (mirl_model, learning_results) = mirl_model.learn( total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=10) learning_results.to_pickle(FOLDER + "/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") mirl_model.save(FOLDER + "/models/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0") del mirl_model del mirl_env
def train_clac(training_tag): env = gym.make(ENVIRONMENT_NAME) env = DummyVecEnv([lambda: env]) print("hidden values before: ", env.unwrapped.envs[0].hiddenValues) data = pd.DataFrame() if (isinstance(training_tag, float)): model = CLAC(clac_MlpPolicy, env, ent_coef=training_tag, verbose=1, policy_kwargs=POLICY_KWARGS) for step in range(TRAINING_STEPS): env.reset() (model, learning_results) = model.learn( total_timesteps=TRAINING_TIMESTEPS, log_interval=100) file_tag = str(training_tag).replace(".", "p") if (SAVE_AGENTS): model.save("nchain/models/CLAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if (SAVE_FINAL_AGENT): model.save("nchain/models/CLAC_" + ENVIRONMENT_NAME + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) del env del model step = 0 return data
def train(training_tag): env = gym.make(ENVIRONMENT_NAME) env = DummyVecEnv([lambda: env]) data = pd.DataFrame() #env._max_episode_steps = 200 if(isinstance(training_tag, float)): model = CLAC(clac_MlpPolicy, env, mut_inf_coef=training_tag, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): #print("length normal: ", env.unwrapped.envs[0].length) (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) #data = data.append(learning_results, ignore_index=True) data = data.append(test(model, "CLAC" + str(training_tag), training_tag, False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "CLAC" + str(training_tag), training_tag, 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "CLAC" + str(training_tag), training_tag, 2, (step + 1) * TRAINING_TIMESTEPS)) file_tag = str(training_tag).replace(".", "p") if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model step = 0 model = SAC(sac_MlpPolicy, env, ent_coef=training_tag, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) #data = data.append(learning_results, ignore_index=True) data = data.append(test(model, "SAC" + str(training_tag), training_tag, False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "SAC" + str(training_tag), training_tag, 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "SAC" + str(training_tag), training_tag, 2, (step + 1) * TRAINING_TIMESTEPS)) file_tag = str(training_tag).replace(".", "p") if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model if(training_tag == "CLAC"): model = CLAC(clac_MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) #data = data.append(learning_results, ignore_index=True) data = data.append(test(model, "CLAC", "auto", False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "CLAC", "auto", 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "CLAC", "auto", 2, (step + 1) * TRAINING_TIMESTEPS)) if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_t" + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model if(training_tag == "SAC"): model = SAC(sac_MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) #data = data.append(learning_results, ignore_index=True) data = data.append(test(model, "SAC", "auto", False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "SAC", "auto", 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "SAC", "auto", 2, (step + 1) * TRAINING_TIMESTEPS)) if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_t" + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str( TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model if(training_tag == "DDPG"): # the noise objects for DDPG n_actions = env.action_space.shape[-1] param_noise = None action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions)) model = DDPG(DDPG_MlpPolicy, env, verbose=VERBOSITY, param_noise=param_noise, action_noise=action_noise, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) #data = data.append(learning_results, ignore_index=True) data = data.append(test(model, "DDPG", None, False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "DDPG", None, 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "DDPG", None, 2, (step + 1) * TRAINING_TIMESTEPS)) if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/DDPG_" + ENVIRONMENT_NAME + "_s" + str(step) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/DDPG_" + ENVIRONMENT_NAME + "_t" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model if(training_tag == "PPO1"): model = PPO1(MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) data = data.append(test(model, "PPO1", training_tag, False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "PPO1", training_tag, 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "PPO1", training_tag, 2, (step + 1) * TRAINING_TIMESTEPS)) if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/PPO1_" + ENVIRONMENT_NAME + "_s" + str(step) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/PPO1_" + ENVIRONMENT_NAME + "_t" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model if(training_tag == "A2C"): model = A2C(MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS) for step in range(TRAINING_STEPS): model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100) data = data.append(test(model, "A2C", training_tag, False, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "A2C", training_tag, 1, (step + 1) * TRAINING_TIMESTEPS)) data = data.append(test(model, "A2C", training_tag, 2, (step + 1) * TRAINING_TIMESTEPS)) if(SAVE_AGENTS): model.save(SAVE_FOLDER + "/models/A2C_" + ENVIRONMENT_NAME + "_s" + str(step) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS)) if(SAVE_FINAL_AGENT): model.save(SAVE_FOLDER + "/models/A2C_" + ENVIRONMENT_NAME + "_t" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS)) env.reset() del model return data
def test_agent(agent_step): now = time.time() for coef_index in range(len(CLAC_COEFS)): mut_coef = CLAC_COEFS[coef_index] ent_coef = SAC_COEFS[coef_index] training_timestep = 0 clac_env = gym.make(ENVIRONMENT_NAME) clac_env = DummyVecEnv([lambda: clac_env]) clac_model = CLAC(CLAC_MlpPolicy, clac_env, mut_inf_coef=mut_coef, verbose=1) sac_env = gym.make(ENVIRONMENT_NAME) sac_env = DummyVecEnv([lambda: sac_env]) sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1) mirl_env = gym.make(ENVIRONMENT_NAME) mirl_env = DummyVecEnv([lambda: mirl_env]) mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=3.3e-3, verbose=1) for resample_step in range(0, NUM_RESAMPLES): features = pd.DataFrame() if(agent_step == 1): print(mut_coef, " ", ent_coef, " ", NUM_TRAINING_STEPS, " ", ENVIRONMENT_NAME, " ", FOLDER, " ", resample_step) (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) # Save models clac_model.save(FOLDER + "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step)) sac_model.save(FOLDER + "/Training/models/CLAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step)) mirl_model.save(FOLDER + "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step)) training_timestep += NUM_TRAINING_STEPS # Test Normal eval_results = eval_model(clac_model, clac_env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 0) eval_results.to_pickle(FOLDER + "/Training/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") eval_results = eval_model(sac_model, sac_env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 0) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Training/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") eval_results = eval_model(mirl_model, mirl_env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 0) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Training/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") # Test generalization eval_results = eval_model(clac_model, clac_env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 1) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") eval_results = eval_model(sac_model, sac_env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 1) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Generalization/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") eval_results = eval_model(mirl_model, mirl_env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 1) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Generalization/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") # Test generalization Extreme eval_results = eval_model(clac_model, clac_env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 2) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Extreme/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") eval_results = eval_model(sac_model, sac_env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 2) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Extreme/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") eval_results = eval_model(mirl_model, mirl_env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 2) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Extreme/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") clac_env.env_method("reset_features") sac_env.env_method("reset_features") mirl_env.env_method("reset_features") del sac_model del sac_env del clac_model del clac_env del mirl_model del mirl_env later = time.time() difference = int(later - now) print("Tested Agent Time: ", difference)
features = pd.DataFrame() mirl_env = gym.make(ENVIRONMENT_NAME) mirl_env = DummyVecEnv([lambda: mirl_env]) print("here 3") mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=0.99, verbose=1) (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=10) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/MIRL_TEST_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") print("here 3") print("here 5") mirl_model.save(FOLDER + "/models/MIRL_TEST_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0") del mirl_model del mirl_env
env.unwrapped.envs[0].setHiddenValues([0.60, 0.40, 0.48823553, 0.50263806, 0.4893053, 0.49855249, 0.51247362, 0.51375429, 0.49918321, 0.48824743]) TOTAL_TIMESTEPS = 100000 SAMPLE_SIZE = 100000 fig, axes = plt.subplots(nrows=2, ncols=5, sharey=True, sharex=True) clac_axes = axes[0,:] sac_axes = axes[1,:] for _ in range(10): for model_index, ent_coef in enumerate([0.0, 1.0, 2.0, 3.0]): model = CLAC(clac_MlpPolicy, env, mut_inf_coef=ent_coef, verbose=0, policy_kwargs = POLICY_KWARGS) (model, learning_results) = model.learn(total_timesteps=TOTAL_TIMESTEPS, log_interval=100) for state in [0,1]: clac_sample = [] for _ in range(SAMPLE_SIZE): clac_action = model.predict([state])[0][0][0] clac_action = (clac_action + 1)/(2) # Normalize the [-1,1] action to [0,1], gym required actions spaces to be symmetric. clac_sample.append(clac_action) clac_sample_array = np.asarray(clac_sample) mu_clac = clac_sample_array.mean() std_clac = clac_sample_array.std() print("CLAC: ent_coef: ", ent_coef, " state: ", state, " mean: ", mu_clac, "std: ", std_clac)
def test_agent(agent_step): now = time.time() for coef_index in range(len(CLAC_COEFS)): mut_coef = CLAC_COEFS[coef_index] ent_coef = SAC_COEFS[coef_index] training_timestep = 0 if(agent_step == 1): print(mut_coef, " ", ent_coef, " ", NUM_TRAINING_STEPS, " ", ENVIRONMENT_NAME, " ", FOLDER) features = pd.DataFrame() clac_env = gym.make(ENVIRONMENT_NAME) clac_env = DummyVecEnv([lambda: clac_env]) clac_model = CLAC(CLAC_MlpPolicy, clac_env, mut_inf_coef=mut_coef, verbose=1) sac_env = gym.make(ENVIRONMENT_NAME) sac_env = DummyVecEnv([lambda: sac_env]) sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1) mirl_env = gym.make(ENVIRONMENT_NAME) mirl_env = DummyVecEnv([lambda: mirl_env]) mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=3.3e-3, verbose=1) (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/Training/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/Training/results/SAC_"+ str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/Training/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") training_timestep += NUM_TRAINING_STEPS sac_env.env_method("set_features", env_features) mirl_env.env_method("set_features", env_features) #if(agent_step == 0): # print(env_features) Power = env_features[0] Density = env_features[1] Friction = env_features[2] Gravity = env_features[3] d = {"Mut Coefficient": mut_coef, "Ent Coefficient": ent_coef, "Resample Step":resample_step, "Power": Power, "Density": Density, "Friction": Friction, "Gravity": Gravity} #d = {"Mut Coefficient": mut_coef, "Resample Step":resample_step, "Power": Power, "Density": Density, "Friction": Friction, "Gravity": Gravity} features = features.append(d, ignore_index = True) # Train generalization eval_results = eval_model(clac_model, env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, 0) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") eval_results = eval_model(sac_model, env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, 0) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") eval_results = eval_model(mirl_model, env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, 0) eval_results['AgentID'] = agent_step eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") clac_model.save(FOLDER + "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0") sac_model.save(FOLDER + "/Training/models/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0") mirl_model.save(FOLDER + "/Training/models/MIRL_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0") #features.to_pickle(FOLDER + "/features/features_" + str(agent_step) + "_" + str(mut_coef) + "_" + str(ent_coef) + ".pkl") for resample_step in range(1, NUM_RESAMPLES): if(agent_step == 1): print(mut_coef, " ", ent_coef, " ", NUM_TRAINING_STEPS, " ", ENVIRONMENT_NAME, " ", FOLDER, " resample step ", resample_step) (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=1000) learning_results.to_pickle(FOLDER + "/Training/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=1000) learning_results.to_pickle(FOLDER + "/Training/results/SAC_"+ str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=1000) learning_results.to_pickle(FOLDER + "/Training/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") training_timestep += NUM_TRAINING_STEPS clac_model.save(FOLDER + "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step)) sac_model.save(FOLDER + "/Training/models/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step)) mirl_model.save(FOLDER + "/Training/models/MIRL_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step)) #print(features) del sac_model del sac_env del clac_model del clac_env del mirl_model del mirl_env later = time.time() difference = int(later - now) print("Tested Agent Time: ", difference)
features = pd.DataFrame() clac_env = gym.make(ENVIRONMENT_NAME) clac_env = DummyVecEnv([lambda: clac_env]) clac_model = CLAC(CLAC_MlpPolicy, clac_env, mut_inf_coef=mut_coef, verbose=1) sac_env = gym.make(ENVIRONMENT_NAME) sac_env = DummyVecEnv([lambda: sac_env]) sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1) (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") for resample_step in range(1, NUM_RESAMPLES): # Set both environments to the same resampled values
def test_agent(agent_step): for coef_index in range(len(CLAC_COEFS)): mut_coef = CLAC_COEFS[coef_index] ent_coef = SAC_COEFS[coef_index] if (agent_step == 1): print(mut_coef, " ", ent_coef, " ", NUM_TRAINING_STEPS, " ", ENVIRONMENT_NAME, " ", FOLDER) features = pd.DataFrame() clac_env = gym.make(ENVIRONMENT_NAME) clac_env = DummyVecEnv([lambda: clac_env]) clac_model = CLAC(CLAC_MlpPolicy, clac_env, mut_inf_coef=mut_coef, verbose=1) sac_env = gym.make(ENVIRONMENT_NAME) sac_env = DummyVecEnv([lambda: sac_env]) sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1) mirl_env = gym.make(ENVIRONMENT_NAME) mirl_env = DummyVecEnv([lambda: mirl_env]) mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=3.3e-4, verbose=1) (clac_model, learning_results) = clac_model.learn( total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") (sac_model, learning_results) = sac_model.learn( total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") (mirl_model, learning_results) = mirl_model.learn( total_timesteps=NUM_TRAINING_STEPS, log_interval=1000) learning_results['AgentID'] = agent_step learning_results.to_pickle(FOLDER + "/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl") for resample_step in range(1, NUM_RESAMPLES): # Set both environments to the same resampled values if (RANDOMIZATION_LEVEL == "Normal"): clac_env.env_method("randomize", 0) elif (RANDOMIZATION_LEVEL == "Random"): clac_env.env_method("randomize", 1) elif (RANDOMIZATION_LEVEL == "Extreme"): clac_env.env_method("randomize", 2) elif (RANDOMIZATION_LEVEL == "Test"): clac_env.env_method("randomize", -1) else: print("Error resampling unknown value: ", RANDOMIZATION_LEVEL) continue env_features = clac_env.env_method("get_features")[0] sac_env.env_method("set_features", env_features) mirl_env.env_method("set_features", env_features) if (agent_step == 1): print(env_features) Power = env_features[0] Density = env_features[1] Friction = env_features[2] Gravity = env_features[3] d = { "Mut Coefficient": mut_coef, "Ent Coefficient": ent_coef, "Resample Step": resample_step, "Power": Power, "Density": Density, "Friction": Friction, "Gravity": Gravity } features = features.append(d, ignore_index=True) (clac_model, learning_results) = clac_model.learn( total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=1000) learning_results.to_pickle(FOLDER + "/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") (sac_model, learning_results) = sac_model.learn( total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=1000) learning_results.to_pickle(FOLDER + "/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") (mirl_model, learning_results) = mirl_model.learn( total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False, log_interval=1000) learning_results.to_pickle(FOLDER + "/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl") clac_model.save(FOLDER + "/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0") sac_model.save(FOLDER + "/models/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0") mirl_model.save(FOLDER + "/models/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0") features.to_pickle(FOLDER + "/features/features_" + str(agent_step) + "_" + str(mut_coef) + "_" + str(ent_coef) + ".pkl") #print(features) del sac_model del sac_env del clac_model del clac_env del mirl_model del mirl_env