Ejemplo n.º 1
0
def train_clac(training_tag):
    env = gym.make(ENVIRONMENT_NAME)
    env = DummyVecEnv([lambda: env])
    print("hidden values before: ", env.unwrapped.envs[0].hiddenValues)

    data = pd.DataFrame()
    if (isinstance(training_tag, float)):
        model = CLAC(clac_MlpPolicy,
                     env,
                     ent_coef=training_tag,
                     verbose=1,
                     policy_kwargs=POLICY_KWARGS)

        for step in range(TRAINING_STEPS):
            env.reset()
            (model, learning_results) = model.learn(
                total_timesteps=TRAINING_TIMESTEPS, log_interval=100)

            file_tag = str(training_tag).replace(".", "p")
            if (SAVE_AGENTS):
                model.save("nchain/models/CLAC_" + ENVIRONMENT_NAME + "_s" +
                           str(step) + "_t" + str(file_tag) + "_i" +
                           str(CURRENT_ITERATION) + "_ts" +
                           str(TRAINING_TIMESTEPS))

        if (SAVE_FINAL_AGENT):
            model.save("nchain/models/CLAC_" + ENVIRONMENT_NAME + "_t" +
                       str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" +
                       str(TRAINING_STEPS * TRAINING_TIMESTEPS))

        del env
        del model
        step = 0
    return data
Ejemplo n.º 2
0
def test_agent(agent_step):
    for coef_index in range(len(CLAC_COEFS)):
        mut_coef = CLAC_COEFS[coef_index]

        if (agent_step == 1):
            print(mut_coef, "  ", NUM_TRAINING_STEPS, "  ", ENVIRONMENT_NAME,
                  "  ", FOLDER)

        features = pd.DataFrame()

        mirl_env = gym.make(ENVIRONMENT_NAME)
        mirl_env = DummyVecEnv([lambda: mirl_env])

        mirl_model = CLAC(CLAC_MlpPolicy,
                          mirl_env,
                          mut_inf_coef=mut_coef,
                          coef_schedule=3.3e-4,
                          verbose=1)

        (mirl_model, learning_results) = mirl_model.learn(
            total_timesteps=NUM_TRAINING_STEPS, log_interval=10)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER + "/results/MIRL_" +
                                   str(mut_coef).replace(".", "p") + "_" +
                                   str(agent_step) + "_0.pkl")

        for resample_step in range(1, NUM_RESAMPLES):
            # Set both environments to the same resampled values
            if (RANDOMIZATION_LEVEL == "Normal"):
                mirl_env.env_method("randomize", 0)
            elif (RANDOMIZATION_LEVEL == "Extreme"):
                mirl_env.env_method("randomize", 1)
            elif (RANDOMIZATION_LEVEL == "Test"):
                mirl_env.env_method("randomize", -1)
            else:
                print("Error resampling unknown value: ", RANDOMIZATION_LEVEL)
                continue

            if (agent_step == 1):
                print(mut_coef, "  ", NUM_TRAINING_STEPS, "  ",
                      ENVIRONMENT_NAME, "  ", FOLDER, " resample step ",
                      resample_step)

            (mirl_model, learning_results) = mirl_model.learn(
                total_timesteps=NUM_TRAINING_STEPS,
                reset_num_timesteps=False,
                log_interval=10)
            learning_results.to_pickle(FOLDER + "/results/MIRL_" +
                                       str(mut_coef).replace(".", "p") + "_" +
                                       str(agent_step) + "_" +
                                       str(resample_step) + ".pkl")

        mirl_model.save(FOLDER + "/models/MIRL_" +
                        str(mut_coef).replace(".", "p") + "_" +
                        str(agent_step) + "_0")

        del mirl_model
        del mirl_env
Ejemplo n.º 3
0
def train(training_tag):
    env = gym.make(ENVIRONMENT_NAME)
    env = DummyVecEnv([lambda: env]) 
    data = pd.DataFrame()
    #env._max_episode_steps = 200

    if(isinstance(training_tag, float)):
        model = CLAC(clac_MlpPolicy, env, mut_inf_coef=training_tag, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS)
        
        for step in range(TRAINING_STEPS):
            #print("length normal: ", env.unwrapped.envs[0].length)

            (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)
            #data = data.append(learning_results, ignore_index=True)

            data = data.append(test(model, "CLAC" + str(training_tag), training_tag, False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "CLAC" + str(training_tag), training_tag, 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "CLAC" + str(training_tag), training_tag, 2, (step + 1) * TRAINING_TIMESTEPS))
            
            file_tag = str(training_tag).replace(".", "p")
            if(SAVE_AGENTS):   
                model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))

        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS))

        env.reset()
        del model
        step = 0
        
        
        model = SAC(sac_MlpPolicy, env, ent_coef=training_tag, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS)
        for step in range(TRAINING_STEPS):
            (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)
            #data = data.append(learning_results, ignore_index=True)

            data = data.append(test(model, "SAC" + str(training_tag), training_tag, False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "SAC" + str(training_tag), training_tag, 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "SAC" + str(training_tag), training_tag, 2, (step + 1) * TRAINING_TIMESTEPS))
            
            file_tag = str(training_tag).replace(".", "p")
            if(SAVE_AGENTS):   
                model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))
        
        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_t" + str(file_tag) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS))

        env.reset()
        del model
        

    if(training_tag == "CLAC"):
        model = CLAC(clac_MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS)

        for step in range(TRAINING_STEPS):
            (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)
            
            #data = data.append(learning_results, ignore_index=True)

            data = data.append(test(model, "CLAC", "auto", False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "CLAC", "auto", 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "CLAC", "auto", 2, (step + 1) * TRAINING_TIMESTEPS))

            if(SAVE_AGENTS):
                model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))

        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/CLAC_" + ENVIRONMENT_NAME + "_t" + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS *  TRAINING_TIMESTEPS))

        env.reset()
        del model
    
    if(training_tag == "SAC"):
        model = SAC(sac_MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS)

        for step in range(TRAINING_STEPS):
            (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)

            #data = data.append(learning_results, ignore_index=True)

            data = data.append(test(model, "SAC", "auto", False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "SAC", "auto", 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "SAC", "auto", 2, (step + 1) * TRAINING_TIMESTEPS))

            if(SAVE_AGENTS):
                model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_s" + str(step) + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))

        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/SAC_" + ENVIRONMENT_NAME + "_t" + "_auto" + "_i" + str(CURRENT_ITERATION) + "_ts" + str( TRAINING_STEPS *  TRAINING_TIMESTEPS))

        env.reset()
        del model
    
    if(training_tag == "DDPG"):
        # the noise objects for DDPG
        n_actions = env.action_space.shape[-1]
        param_noise = None
        action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=float(0.5) * np.ones(n_actions))

        model = DDPG(DDPG_MlpPolicy, env, verbose=VERBOSITY, param_noise=param_noise, action_noise=action_noise, policy_kwargs = POLICY_KWARGS)

        for step in range(TRAINING_STEPS):
            (model, learning_results) = model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)

            #data = data.append(learning_results, ignore_index=True)

            data = data.append(test(model, "DDPG", None, False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "DDPG", None, 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "DDPG", None, 2, (step + 1) * TRAINING_TIMESTEPS))
            
            if(SAVE_AGENTS):
                model.save(SAVE_FOLDER + "/models/DDPG_" + ENVIRONMENT_NAME + "_s" + str(step) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))

        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/DDPG_" + ENVIRONMENT_NAME + "_t" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS *  TRAINING_TIMESTEPS))

        env.reset()
        del model

    if(training_tag == "PPO1"):
        model = PPO1(MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS)

        for step in range(TRAINING_STEPS):
            model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)

            data = data.append(test(model, "PPO1", training_tag, False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "PPO1", training_tag, 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "PPO1", training_tag, 2, (step + 1) * TRAINING_TIMESTEPS))
            
            if(SAVE_AGENTS):
                model.save(SAVE_FOLDER + "/models/PPO1_" + ENVIRONMENT_NAME + "_s" + str(step) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))

        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/PPO1_" + ENVIRONMENT_NAME + "_t" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS))

        env.reset()
        del model
    
    if(training_tag == "A2C"):
        model = A2C(MlpPolicy, env, verbose=VERBOSITY, policy_kwargs = POLICY_KWARGS)

        for step in range(TRAINING_STEPS):
            model.learn(total_timesteps=TRAINING_TIMESTEPS, log_interval=100)

            data = data.append(test(model, "A2C", training_tag, False, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "A2C", training_tag, 1, (step + 1) * TRAINING_TIMESTEPS))
            data = data.append(test(model, "A2C", training_tag, 2, (step + 1) * TRAINING_TIMESTEPS))
            
            if(SAVE_AGENTS):
                model.save(SAVE_FOLDER + "/models/A2C_" + ENVIRONMENT_NAME + "_s" + str(step) + "_i" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_TIMESTEPS))

        if(SAVE_FINAL_AGENT):
            model.save(SAVE_FOLDER + "/models/A2C_" + ENVIRONMENT_NAME + "_t" + str(CURRENT_ITERATION) + "_ts" + str(TRAINING_STEPS * TRAINING_TIMESTEPS))

        env.reset()
        del model

    return data
Ejemplo n.º 4
0
def test_agent(agent_step):
    now = time.time()
    for coef_index in range(len(CLAC_COEFS)):

        mut_coef = CLAC_COEFS[coef_index]
        ent_coef = SAC_COEFS[coef_index]
        training_timestep = 0

        clac_env = gym.make(ENVIRONMENT_NAME)
        clac_env = DummyVecEnv([lambda: clac_env])
        clac_model = CLAC(CLAC_MlpPolicy, clac_env, mut_inf_coef=mut_coef, verbose=1)

        sac_env = gym.make(ENVIRONMENT_NAME)
        sac_env = DummyVecEnv([lambda: sac_env])

        sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1)

        mirl_env = gym.make(ENVIRONMENT_NAME)
        mirl_env = DummyVecEnv([lambda: mirl_env])

        mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=3.3e-3, verbose=1)
        
        for resample_step in range(0, NUM_RESAMPLES):
            features = pd.DataFrame()

            if(agent_step == 1):
                print(mut_coef,  "  ",  ent_coef, "  ", NUM_TRAINING_STEPS, "  ",  ENVIRONMENT_NAME, "  ", FOLDER, " ", resample_step)

            (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
            (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
            (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)

            # Save models 
            clac_model.save(FOLDER + "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step))
            sac_model.save(FOLDER + "/Training/models/CLAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step))
            mirl_model.save(FOLDER + "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step))

            training_timestep += NUM_TRAINING_STEPS

            # Test Normal 
            eval_results = eval_model(clac_model, clac_env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 0)
            eval_results.to_pickle(FOLDER + "/Training/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            eval_results = eval_model(sac_model, sac_env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 0)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Training/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            eval_results = eval_model(mirl_model, mirl_env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 0)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Training/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            # Test generalization 
            eval_results = eval_model(clac_model, clac_env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 1)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            eval_results = eval_model(sac_model, sac_env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 1)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Generalization/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            eval_results = eval_model(mirl_model, mirl_env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 1)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Generalization/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            # Test generalization Extreme
            eval_results = eval_model(clac_model, clac_env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 2)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Extreme/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            eval_results = eval_model(sac_model, sac_env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 2)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Extreme/results/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            eval_results = eval_model(mirl_model, mirl_env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, agent_step, resample_step, 2)
            eval_results['AgentID'] = agent_step
            eval_results.to_pickle(FOLDER + "/Extreme/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            clac_env.env_method("reset_features")
            sac_env.env_method("reset_features")
            mirl_env.env_method("reset_features")
        
        del sac_model
        del sac_env

        del clac_model
        del clac_env
        
        del mirl_model
        del mirl_env

    later = time.time()
    difference = int(later - now)
    print("Tested Agent Time: ", difference)
Ejemplo n.º 5
0
    mut_coef = CLAC_COEFS[coef_index]

    if (agent_step == 1):
        print(mut_coef, "  ", NUM_TRAINING_STEPS, "  ", ENVIRONMENT_NAME, "  ",
              FOLDER)

    features = pd.DataFrame()

    mirl_env = gym.make(ENVIRONMENT_NAME)
    mirl_env = DummyVecEnv([lambda: mirl_env])

    print("here 3")

    mirl_model = CLAC(CLAC_MlpPolicy,
                      mirl_env,
                      mut_inf_coef=mut_coef,
                      coef_schedule=0.99,
                      verbose=1)

    (mirl_model,
     learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS,
                                          log_interval=10)
    learning_results['AgentID'] = agent_step
    learning_results.to_pickle(FOLDER + "/results/MIRL_TEST_" +
                               str(mut_coef).replace(".", "p") + "_" +
                               str(agent_step) + "_0.pkl")

    print("here 3")

    print("here 5")
    mirl_model.save(FOLDER + "/models/MIRL_TEST_" +
Ejemplo n.º 6
0
env = DummyVecEnv([lambda: env])

env.unwrapped.envs[0].setHiddenValues([0.60, 0.40, 0.48823553, 0.50263806, 0.4893053,  0.49855249, 0.51247362, 0.51375429, 0.49918321, 0.48824743])


TOTAL_TIMESTEPS = 100000  
SAMPLE_SIZE = 100000

fig, axes = plt.subplots(nrows=2, ncols=5, sharey=True, sharex=True)

clac_axes = axes[0,:]
sac_axes  = axes[1,:]

for _ in range(10):
    for model_index, ent_coef in enumerate([0.0, 1.0, 2.0, 3.0]):
        model = CLAC(clac_MlpPolicy, env, mut_inf_coef=ent_coef, verbose=0, policy_kwargs = POLICY_KWARGS)
        (model, learning_results) = model.learn(total_timesteps=TOTAL_TIMESTEPS, log_interval=100)

        for state in [0,1]: 
            clac_sample = []
            
            for _ in range(SAMPLE_SIZE):
                clac_action = model.predict([state])[0][0][0] 
                clac_action = (clac_action + 1)/(2) # Normalize the [-1,1] action to [0,1], gym required actions spaces to be symmetric.
                clac_sample.append(clac_action)
            
            clac_sample_array = np.asarray(clac_sample)

            mu_clac = clac_sample_array.mean()
            std_clac = clac_sample_array.std()
Ejemplo n.º 7
0
#model_tags = ["0p01", "0p1", "1p0", "10p0", "100p0"]

TIMESTEPS = 100000
data_1 = pd.DataFrame()
data_2 = pd.DataFrame()
data_3 = pd.DataFrame()

for (model_index, model_tag) in enumerate(model_tags):
    for index in range(3):
        #print("break")
        env = gym.make("Pendulum-v0")
        env = DummyVecEnv([lambda: env])

        clac_model_path = RESULTS_FOLDER + "CLAC_Pendulum-v0_t" + model_tag + "_i" + str(
            index + 1) + "_ts" + str(NUM_TRAINING_STEPS) + ".pkl"
        clac_model = CLAC.load(clac_model_path, env=env)

        obs = env.reset()
        episode_reward = 0

        for _ in range(TIMESTEPS):
            action, _ = clac_model.predict(obs)
            obs, rewards, dones, info = env.step(action)
            episode_reward += rewards[0]

            #print("state", obs)
            #print("action", (action + 1)/(2))
            #print("mean", np.mean(env.unwrapped.envs[0].hiddenValues))

            if (dones[0]):
                env_name = env.unwrapped.envs[0].spec.id
Ejemplo n.º 8
0
def test_agent(agent_step):
    now = time.time()
    for coef_index in range(len(CLAC_COEFS)):
        mut_coef = CLAC_COEFS[coef_index]
        ent_coef = SAC_COEFS[coef_index]
        training_timestep = 0

        if(agent_step == 1):
            print(mut_coef,  "  ",  ent_coef, "  ", NUM_TRAINING_STEPS, "  ",  ENVIRONMENT_NAME, "  ", FOLDER)
        
        features = pd.DataFrame()
        
        clac_env = gym.make(ENVIRONMENT_NAME)
        clac_env = DummyVecEnv([lambda: clac_env])
        clac_model = CLAC(CLAC_MlpPolicy, clac_env, mut_inf_coef=mut_coef, verbose=1)

        sac_env = gym.make(ENVIRONMENT_NAME)
        sac_env = DummyVecEnv([lambda: sac_env])

        sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1)

        mirl_env = gym.make(ENVIRONMENT_NAME)
        mirl_env = DummyVecEnv([lambda: mirl_env])

        mirl_model = CLAC(CLAC_MlpPolicy, mirl_env, mut_inf_coef=mut_coef, coef_schedule=3.3e-3, verbose=1)

        (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER + "/Training/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl")

        (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER +  "/Training/results/SAC_"+ str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl")

        (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER + "/Training/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl")

        training_timestep += NUM_TRAINING_STEPS

        sac_env.env_method("set_features", env_features) 
        mirl_env.env_method("set_features", env_features) 

        #if(agent_step == 0):
        #    print(env_features)

        Power = env_features[0]
        Density = env_features[1]
        Friction = env_features[2]
        Gravity = env_features[3]

        d = {"Mut Coefficient":  mut_coef, "Ent Coefficient":  ent_coef, "Resample Step":resample_step, "Power": Power, "Density": Density, "Friction": Friction, "Gravity": Gravity}
        #d = {"Mut Coefficient":  mut_coef, "Resample Step":resample_step, "Power": Power, "Density": Density, "Friction": Friction, "Gravity": Gravity}
        features = features.append(d, ignore_index = True)

        # Train generalization 
        eval_results = eval_model(clac_model, env, "CLAC", mut_coef, NUM_TESTING_STEPS, training_timestep, 0)
        eval_results['AgentID'] = agent_step
        eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl")

        eval_results = eval_model(sac_model, env, "SAC", ent_coef, NUM_TESTING_STEPS, training_timestep, 0)
        eval_results['AgentID'] = agent_step
        eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl")

        eval_results = eval_model(mirl_model, env, "MIRL", mut_coef, NUM_TESTING_STEPS, training_timestep, 0)
        eval_results['AgentID'] = agent_step
        eval_results.to_pickle(FOLDER + "/Generalization/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0.pkl")

        clac_model.save(FOLDER +  "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_0")
        sac_model.save(FOLDER + "/Training/models/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0")
        mirl_model.save(FOLDER + "/Training/models/MIRL_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_0")
        #features.to_pickle(FOLDER +  "/features/features_" + str(agent_step) + "_" + str(mut_coef)  + "_" + str(ent_coef) + ".pkl")
        
        for resample_step in range(1, NUM_RESAMPLES):
            if(agent_step == 1):
                print(mut_coef,  "  ",  ent_coef, "  ", NUM_TRAINING_STEPS, "  ",  ENVIRONMENT_NAME, "  ", FOLDER, " resample step ", resample_step)
            
            (clac_model, learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False,  log_interval=1000)
            learning_results.to_pickle(FOLDER + "/Training/results/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            (sac_model, learning_results) = sac_model.learn(total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False,  log_interval=1000)
            learning_results.to_pickle(FOLDER +  "/Training/results/SAC_"+ str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            (mirl_model, learning_results) = mirl_model.learn(total_timesteps=NUM_TRAINING_STEPS, reset_num_timesteps=False,  log_interval=1000)
            learning_results.to_pickle(FOLDER + "/Training/results/MIRL_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step) + ".pkl")

            training_timestep += NUM_TRAINING_STEPS

            clac_model.save(FOLDER +  "/Training/models/CLAC_" + str(mut_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step))
            sac_model.save(FOLDER + "/Training/models/SAC_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step))
            mirl_model.save(FOLDER + "/Training/models/MIRL_" + str(ent_coef).replace(".", "p") + "_" + str(agent_step) + "_" + str(resample_step))

        #print(features)

        del sac_model
        del sac_env

        del clac_model
        del clac_env
        
        del mirl_model
        del mirl_env

    later = time.time()
    difference = int(later - now)
    print("Tested Agent Time: ", difference)
Ejemplo n.º 9
0
agent_step = 1

for coef_index in range(len(CLAC_COEFS)):
    mut_coef = CLAC_COEFS[coef_index]
    ent_coef = SAC_COEFS[coef_index]

    if (agent_step == 1):
        print(mut_coef, "  ", ent_coef, "  ", NUM_TRAINING_STEPS, "  ",
              ENVIRONMENT_NAME, "  ", FOLDER)

    features = pd.DataFrame()

    clac_env = gym.make(ENVIRONMENT_NAME)
    clac_env = DummyVecEnv([lambda: clac_env])
    clac_model = CLAC(CLAC_MlpPolicy,
                      clac_env,
                      mut_inf_coef=mut_coef,
                      verbose=1)

    sac_env = gym.make(ENVIRONMENT_NAME)
    sac_env = DummyVecEnv([lambda: sac_env])

    sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1)

    (clac_model,
     learning_results) = clac_model.learn(total_timesteps=NUM_TRAINING_STEPS,
                                          log_interval=1000)
    learning_results['AgentID'] = agent_step
    learning_results.to_pickle(FOLDER + "/results/CLAC_" +
                               str(mut_coef).replace(".", "p") + "_" +
                               str(agent_step) + "_0.pkl")
Ejemplo n.º 10
0
def test_agent(agent_step):
    for coef_index in range(len(CLAC_COEFS)):
        mut_coef = CLAC_COEFS[coef_index]
        ent_coef = SAC_COEFS[coef_index]

        if (agent_step == 1):
            print(mut_coef, "  ", ent_coef, "  ", NUM_TRAINING_STEPS, "  ",
                  ENVIRONMENT_NAME, "  ", FOLDER)

        features = pd.DataFrame()

        clac_env = gym.make(ENVIRONMENT_NAME)
        clac_env = DummyVecEnv([lambda: clac_env])
        clac_model = CLAC(CLAC_MlpPolicy,
                          clac_env,
                          mut_inf_coef=mut_coef,
                          verbose=1)

        sac_env = gym.make(ENVIRONMENT_NAME)
        sac_env = DummyVecEnv([lambda: sac_env])

        sac_model = SAC(MlpPolicy, sac_env, ent_coef=ent_coef, verbose=1)

        mirl_env = gym.make(ENVIRONMENT_NAME)
        mirl_env = DummyVecEnv([lambda: mirl_env])

        mirl_model = CLAC(CLAC_MlpPolicy,
                          mirl_env,
                          mut_inf_coef=mut_coef,
                          coef_schedule=3.3e-4,
                          verbose=1)

        (clac_model, learning_results) = clac_model.learn(
            total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER + "/results/CLAC_" +
                                   str(mut_coef).replace(".", "p") + "_" +
                                   str(agent_step) + "_0.pkl")

        (sac_model, learning_results) = sac_model.learn(
            total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER + "/results/SAC_" +
                                   str(ent_coef).replace(".", "p") + "_" +
                                   str(agent_step) + "_0.pkl")

        (mirl_model, learning_results) = mirl_model.learn(
            total_timesteps=NUM_TRAINING_STEPS, log_interval=1000)
        learning_results['AgentID'] = agent_step
        learning_results.to_pickle(FOLDER + "/results/MIRL_" +
                                   str(mut_coef).replace(".", "p") + "_" +
                                   str(agent_step) + "_0.pkl")

        for resample_step in range(1, NUM_RESAMPLES):
            # Set both environments to the same resampled values
            if (RANDOMIZATION_LEVEL == "Normal"):
                clac_env.env_method("randomize", 0)
            elif (RANDOMIZATION_LEVEL == "Random"):
                clac_env.env_method("randomize", 1)
            elif (RANDOMIZATION_LEVEL == "Extreme"):
                clac_env.env_method("randomize", 2)
            elif (RANDOMIZATION_LEVEL == "Test"):
                clac_env.env_method("randomize", -1)
            else:
                print("Error resampling unknown value: ", RANDOMIZATION_LEVEL)
                continue

            env_features = clac_env.env_method("get_features")[0]
            sac_env.env_method("set_features", env_features)
            mirl_env.env_method("set_features", env_features)

            if (agent_step == 1):
                print(env_features)

            Power = env_features[0]
            Density = env_features[1]
            Friction = env_features[2]
            Gravity = env_features[3]

            d = {
                "Mut Coefficient": mut_coef,
                "Ent Coefficient": ent_coef,
                "Resample Step": resample_step,
                "Power": Power,
                "Density": Density,
                "Friction": Friction,
                "Gravity": Gravity
            }
            features = features.append(d, ignore_index=True)

            (clac_model, learning_results) = clac_model.learn(
                total_timesteps=NUM_TRAINING_STEPS,
                reset_num_timesteps=False,
                log_interval=1000)
            learning_results.to_pickle(FOLDER + "/results/CLAC_" +
                                       str(mut_coef).replace(".", "p") + "_" +
                                       str(agent_step) + "_" +
                                       str(resample_step) + ".pkl")

            (sac_model, learning_results) = sac_model.learn(
                total_timesteps=NUM_TRAINING_STEPS,
                reset_num_timesteps=False,
                log_interval=1000)
            learning_results.to_pickle(FOLDER + "/results/SAC_" +
                                       str(ent_coef).replace(".", "p") + "_" +
                                       str(agent_step) + "_" +
                                       str(resample_step) + ".pkl")

            (mirl_model, learning_results) = mirl_model.learn(
                total_timesteps=NUM_TRAINING_STEPS,
                reset_num_timesteps=False,
                log_interval=1000)
            learning_results.to_pickle(FOLDER + "/results/MIRL_" +
                                       str(mut_coef).replace(".", "p") + "_" +
                                       str(agent_step) + "_" +
                                       str(resample_step) + ".pkl")

        clac_model.save(FOLDER + "/models/CLAC_" +
                        str(mut_coef).replace(".", "p") + "_" +
                        str(agent_step) + "_0")
        sac_model.save(FOLDER + "/models/SAC_" +
                       str(ent_coef).replace(".", "p") + "_" +
                       str(agent_step) + "_0")
        mirl_model.save(FOLDER + "/models/MIRL_" +
                        str(mut_coef).replace(".", "p") + "_" +
                        str(agent_step) + "_0")
        features.to_pickle(FOLDER + "/features/features_" + str(agent_step) +
                           "_" + str(mut_coef) + "_" + str(ent_coef) + ".pkl")

        #print(features)

        del sac_model
        del sac_env

        del clac_model
        del clac_env

        del mirl_model
        del mirl_env