Ejemplo n.º 1
0
def good_inverted_car(directory="./Results/Car/Noisy_500/"):
    rccar = RCCarBarriers(noise=0.)
    policies = [
        PolicyLoader("models/noisy/" + path) for path in ['good', 'other']
    ]
    domain = RLPyEnv(rccar)
    env = HRLEnv(domain, policies)
    policy = CategoricalMLPPolicy(env_spec=env.spec, )
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=500,
        discount=0.9,
        step_size=0.001,
        # plot=True,
    )
    assert False, "Make sure to change logging directory before rerunning this experiment"

    run_experiment_lite(
        algo.train(),
        # Number of parallel workers for sampling
        n_parallel=4,
        # Only keep the snapshot parameters for the last iteration
        snapshot_mode="last",
        script="scripts/run_experiment_lite_rl.py",
        log_dir=directory,
        # Specifies the seed for the experiment. If this is not provided, a random seed
        # will be used
        seed=1,
        # plot=True,
    )
Ejemplo n.º 2
0
def test(num=1, path="./Results/Tmp", save=False):
    policies = [
        PolicyLoader("models/Acrobot/" + path)
        for path in ['Mass2_Light', 'Mass1_Light', 'Mass1_Heavy'][:]
    ]
    # directory = os.path.join(directory, exp_name)
    acrobot = ModifiedAcrobot()
    acrobot.dt = 0.1
    acrobot.episodeCap = 1000
    # acrobot.torque_noise_max = 0.05
    domain = RLPyEnv(acrobot)
    env = HRLEnv(domain, policies)
    # env = DoublePendulumEnv()
    policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(8, 8))
    # rollout(env, policy)
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=50,
        discount=0.995,
        step_size=0.001,
        # plot=True,
    )
    algo.train()
Ejemplo n.º 3
0
def noisy_bandits():
	rccar = RCCarBarriers(noise=0.1)
	policies = [PolicyLoader("models/noisy/" +path) for path in ['good','untrained', 'untrained', 'untrained'] ]
	domain = RLPyEnv(rccar)
	band = Bandits(policies, domain, N=100, rmax=rccar.GOAL_REWARD, rmin=-rccar.episodeCap - 20)
	chosen = band.run()
	import joblib; joblib.dump(band.choices, "Results/Bandits/Noisy")
	return band
Ejemplo n.º 4
0
def two_policy_bandits():
	rccar = RCCarSlideTurn(noise=0.1) # remove process noise
	domain = RLPyEnv(rccar)
	policies = [PolicyLoader("models/slideturn_experiment/" + path) for path in ['agent0','agent1'] ]
	band = Bandits(policies, domain, N=100, rmax=rccar.GOAL_REWARD, rmin=-rccar.episodeCap)
	chosen = band.run()
	import joblib; joblib.dump(band.choices, "Results/Bandits/TwoPolicy")
	return band
Ejemplo n.º 5
0
def good_double_bad_car(num=1,
                        directory="./Results/Car/CarNoisy/",
                        exp_name="Noisy_2_untrained",
                        save=False):
    rccar = RCCarBarriers(noise=0.1)
    policies = [
        PolicyLoader("models/noisy/" + path)
        for path in ['good', 'untrained', 'untrained']
    ]
    domain = RLPyEnv(rccar)
    env = HRLEnv(domain, policies)
    # policy = CategoricalMLPPolicy(
    #     env_spec=env.spec,
    # )
    # baseline = LinearFeatureBaseline(env_spec=env.spec)
    # algo = TRPO(
    #     env=env,
    #     policy=policy,
    #     baseline=baseline,
    #     batch_size=4000,
    #     max_path_length=env.horizon,
    #     n_itr=500,
    #     discount=0.9,
    #     step_size=0.001,
    #     # plot=True,
    # )
    policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 32))
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    for i in range(num):
        now = datetime.datetime.now()
        timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=4000,
            max_path_length=env.horizon,
            n_itr=600,
            discount=0.9,
            step_size=0.0001,
            # plot=True,
        )

        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            script="scripts/run_experiment_lite_rl.py",
            exp_name=exp_name + timestamp,
            log_dir=os.path.join(directory, exp_name +
                                 timestamp) if save else "./Results/Tmp",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            # plot=True,
        )
Ejemplo n.º 6
0
def good_x_cars(num_agents=5,
                directory="./Final_Results/Car/CarNoisyAgents/",
                exp_name="NoisyTest",
                save=False):

    rccar = RCCarBarriers(noise=0.1)
    policies = [
        PolicyLoader("models/noisy/" + path) for path in [
            'good', 'untrained', 'untrained', 'untrained', 'untrained',
            'untrained'
        ]
    ][:1 + num_agents]
    domain = RLPyEnv(rccar)
    env = HRLEnv(domain, policies)
    policy = CategoricalMLPPolicy(env_spec=env.spec, hidden_sizes=(32, 32))
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    exp_name = exp_name + str(num_agents)
    directory = os.path.join(directory, exp_name)
    for i in range(3):
        now = datetime.datetime.now()
        timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=2000,
            max_path_length=env.horizon,
            n_itr=500,
            discount=.995,
            step_size=0.001,
            # plot=True,
        )
        # algo.train()
        # rollout(env, policy)
        try:
            os.mkdir(directory)
        except Exception:
            pass
        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            script="scripts/run_experiment_lite_rl.py",
            exp_name=exp_name,
            log_dir=os.path.join(directory, timestamp)
            if save else "./Results/Tmp",
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            # plot=True,
        )
Ejemplo n.º 7
0
def slideturn_noisy(val=0.1,
                    directory="./Results/Car/NoisyObs500/",
                    exp_name="Cap_",
                    save=False):
    policies = [
        PolicyLoader("models/slideturn_experiment/" + path)
        for path in ['agent0', 'agent1']
    ]
    rccar = RCCarSlideTurn(noise=0.)  # remove process noise

    domain = RLPyEnv(rccar)
    original_env = HRLEnv(domain, policies)
    env = NoisyObservationEnv(original_env, obs_noise=val)
    policy = CategoricalMLPPolicy(env_spec=env.spec, )
    baseline = LinearFeatureBaseline(env_spec=env.spec)
    dir_name = os.path.join(directory, exp_name)
    for i in range(1):
        now = datetime.datetime.now()
        timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=4000,
            max_path_length=env.horizon,
            n_itr=500,
            discount=0.9,
            step_size=0.01,
            # plot=True,
        )
        # algo.train()
        # rollout(env, policy)
        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            script="scripts/run_experiment_lite_rl.py",
            exp_name=exp_name + timestamp,
            log_dir=os.path.join(dir_name, timestamp)
            if save else './Results/Tmp2',
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            # plot=True,
        )
Ejemplo n.º 8
0
def slideturn_turn_only(num=1, directory="./Final_Results/Car/SlideTurn/", exp_name="Turn", save=True):
    policies = [PolicyLoader("models/slideturn_experiment/" + path) for path in ['agent1'] ]
    directory = os.path.join(directory, exp_name)
    for i in range(num):
        rccar = RCCarSlideTurn(noise=0.1)
        now = datetime.datetime.now()
        timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')

        domain = RLPyEnv(rccar)
        env = HRLEnv(domain, policies)
        policy = CategoricalMLPPolicy(
            env_spec=env.spec,
        )
        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = TRPO(
            env=env,
            policy=policy,
            baseline=baseline,
            batch_size=1000,
            max_path_length=env.horizon,
            n_itr=500 + 3,
            discount=0.995,
            step_size=0.001,
            # plot=True,
        )
        # algo.train()
        # rollout(env, policy)
        run_experiment_lite(
            algo.train(),
            # Number of parallel workers for sampling
            n_parallel=4,
            # Only keep the snapshot parameters for the last iteration
            snapshot_mode="last",
            script="scripts/run_experiment_lite_rl.py",
            exp_name=exp_name,
            log_dir=os.path.join(directory, timestamp) if save else './Results/Tmp',
            # Specifies the seed for the experiment. If this is not provided, a random seed
            # will be used
            # plot=True,
        )