Python DDPG.train Exemples, rllab.algos.ddpg.DDPG.train Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : ddpg_walker.py Projet : afcarl/polyRL

            def run_task(*_):
                env = normalize(Walker2DEnv())

                policy = DeterministicMLPPolicy(
                    env_spec=env.spec,
                    # The neural network policy should have two hidden layers, each with 32 hidden units.
                    hidden_sizes=(H_layer_first[h], H_layer_second[h])
                )

                es = OUStrategy(env_spec=env.spec)

                qf = ContinuousMLPQFunction(env_spec=env.spec)

                algo = DDPG(
                    env=env,
                    policy=policy,
                    es=es,
                    qf=qf,
                    batch_size=size_of_batch,
                    max_path_length=100,
                    epoch_length=1000,
                    min_pool_size=10000,
                    n_epochs=number_of_episodes,
                    discount=discount_factor,
                    scale_reward=reward_scaling[r],
                    qf_learning_rate=critic_learning_rate[c],
                    policy_learning_rate=actor_learning_rate[c],
                    # Uncomment both lines (this and the plot parameter below) to enable plotting
                    # plot=True,
                )
                algo.train()

Exemple #2

0

Afficher le fichier

Fichier : algo_launchers.py Projet : fredshentu/public_model_based_controller

def rllab_ddpg_launcher(variant):
	from rllab.algos.ddpg import DDPG as RllabDDPG
	from rllab.exploration_strategies.ou_strategy import OUStrategy
	from rllab.q_functions.continuous_mlp_q_function import (
		ContinuousMLPQFunction as TheanoContinuousMLPQFunction
	)
	from rllab.policies.deterministic_mlp_policy import (
		DeterministicMLPPolicy as TheanoDeterministicMLPPolicy
	)
	from railrl.launchers.launcher_util import get_env_settings
	env_settings = get_env_settings(**variant['env_params'])
	env = env_settings['env']
	policy = TheanoDeterministicMLPPolicy(
		env_spec=env.spec,
		hidden_sizes=(32, 32)
	)

	es = OUStrategy(env_spec=env.spec)

	qf = TheanoContinuousMLPQFunction(env_spec=env.spec)

	algorithm = RllabDDPG(
		env=env,
		policy=policy,
		es=es,
		qf=qf,
		**variant['algo_params']
	)
	algorithm.train()

Exemple #3

0

Afficher le fichier

Fichier : ddpg_gym_lunar_cont.py Projet : tzs930/deeprl_practice_colab

def run_task(*_):

    env = normalize(
        GymEnv(env_name="MountainCarContinuous-v0", force_reset=True))
    max_path_length = 300

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers
        hidden_sizes=(64, 64))

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec, hidden_sizes=(64, 64))

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=100,
        n_updates_per_sample=1,
        max_path_length=max_path_length,
        epoch_length=900,
        min_pool_size=800,
        replay_pool_size=5000,
        n_epochs=1000,
        discount=0.99,
        scale_reward=0.1,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
    )
    algo.train()

Exemple #4

0

Afficher le fichier

Fichier : ddpg_cartpole.py Projet : andrewliao11/rllab

def run_task(*_):
    env = normalize(CartpoleEnv())

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32)
    )

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec)

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=32,
        max_path_length=100,
        epoch_length=1000,
        min_pool_size=10000,
        n_epochs=1000,
        discount=0.99,
        scale_reward=0.01,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train()

Exemple #5

0

Afficher le fichier

def main(exp_name, ent_wt=1.0):
    register_custom_envs()
    env_name = 'LunarLanderContinuous-v3'
    env = GymEnv(env_name)
    policy = DeterministicMLPPolicy(env_spec=env.spec, hidden_sizes=(64, 64))
    es = OUStrategy(env_spec=env.spec)
    qf = ContinuousMLPQFunction(env_spec=env.spec)

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=32,
        max_path_length=350,
        epoch_length=350,
        min_pool_size=350,
        n_epochs=600,
        discount=0.99,
        scale_reward=1.0/140.0,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    data_path = 'data/%s_data_rllab_%s/%s/'%(env_name.replace('-', '_'), 
                                             str(algo.__class__.__name__), 
                                             exp_name)
    os.makedirs(data_path, exist_ok=True)
    logger.set_snapshot_dir(data_path)
    algo.train()
    logger.set_snapshot_dir(None)

Exemple #6

0

Afficher le fichier

Fichier : ddpg_swimmer.py Projet : ferric123/robotarm

def run_task(*_):
    env = normalize(SwimmerEnv())

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32))

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec)

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=32,
        max_path_length=100,
        epoch_length=1000,
        min_pool_size=10000,
        n_epochs=200,
        discount=0.99,
        scale_reward=0.01,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        plot=True,
    )
    algo.train()

Exemple #7

0

Afficher le fichier

Fichier : ddpgModel.py Projet : Lukeeeeee/DataCenterJobSchedulingSolution

class DDPGModel(Model):
    def __init__(self):
        self.ddpg = DDPG()

    def predict(self, obs):
        action = self.ddpg.policy.get_action(observation=obs)

    def train(self, batch_data):
        self.ddpg.train(batch_data=)

Exemple #8

0

Afficher le fichier

def run_task(*_):
    """
    DPG on Hopper environment
    """
    env = normalize(HopperEnv())

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(400, 300))

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec)
    """
    Using the DDPG algorithm
    """
    # algo = DDPG(
    #     env=env,
    #     policy=policy,
    #     es=es,
    #     qf=qf,
    #     batch_size=32,
    #     max_path_length=500,
    #     epoch_length=500,
    #     min_pool_size=10000,
    #     n_epochs=20000,
    #     discount=0.99,
    #     scale_reward=0.01,
    #     qf_learning_rate=1e-3,
    #     policy_learning_rate=1e-4,
    #     #Uncomment both lines (this and the plot parameter below) to enable plotting
    #     plot=True,
    # )

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=64,
        max_path_length=1000,
        epoch_length=1000,
        min_pool_size=10000,
        n_epochs=10000,
        discount=0.99,
        scale_reward=0.01,
        qf_learning_rate=10e-3,
        policy_learning_rate=10e-4,
        #Uncomment both lines (this and the plot parameter below) to enable plotting
        plot=True,
    )

    algo.train()

Exemple #9

0

Afficher le fichier

Fichier : ddpg_swimmer.py Projet : afcarl/polyRL

def run_task(*_):
    """
    DPG on Swimmer environment
    """
    env = normalize(SwimmerEnv())
    """
    Initialise the policy as a neural network policy
    """
    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(32, 32))
    """
    Defining exploration strategy : OUStrategy - 
    """
    """
    This strategy implements the Ornstein-Uhlenbeck process, which adds
    time-correlated noise to the actions taken by the deterministic policy.
    The OU process satisfies the following stochastic differential equation:
    dxt = theta*(mu - xt)*dt + sigma*dWt
    where Wt denotes the Wiener process
    """
    es = OUStrategy(env_spec=env.spec)
    """
    Defining the Q network
    """
    qf = ContinuousMLPQFunction(env_spec=env.spec)
    """
    Using the DDPG algorithm
    """
    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=32,
        max_path_length=500,
        epoch_length=500,
        min_pool_size=10000,
        n_epochs=20000,
        discount=0.99,
        scale_reward=0.01,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
        #Uncomment both lines (this and the plot parameter below) to enable plotting
        plot=True,
    )
    """
    Training the networks based on the DDPG algorithm
    """
    algo.train()

Exemple #10

0

Afficher le fichier

Fichier : test_algos.py Projet : cknd/rllab

def test_ddpg():
    env = CartpoleEnv()
    policy = DeterministicMLPPolicy(env.spec)
    qf = ContinuousMLPQFunction(env.spec)
    es = OUStrategy(env.spec)
    algo = DDPG(
        env=env, policy=policy, qf=qf, es=es,
        n_epochs=1,
        epoch_length=100,
        batch_size=32,
        min_pool_size=50,
        replay_pool_size=1000,
        eval_samples=100,
    )
    algo.train()

Exemple #11

0

Afficher le fichier

def test_ddpg():
    env = CartpoleEnv()
    policy = DeterministicMLPPolicy(env.spec)
    qf = ContinuousMLPQFunction(env.spec)
    es = OUStrategy(env.spec)
    algo = DDPG(
        env=env, policy=policy, qf=qf, es=es,
        n_epochs=1,
        epoch_length=100,
        batch_size=32,
        min_pool_size=50,
        replay_pool_size=1000,
        eval_samples=100,
    )
    algo.train()

Exemple #12

0

Afficher le fichier

Fichier : test_rllab.py Projet : UVA-DSA/ContextSafetyMonitorAPS

def test_rllab(patient_id=1, Initial_Bg=0):
    try:
        from rllab.algos.ddpg import DDPG
        from rllab.envs.normalized_env import normalize
        from rllab.exploration_strategies.ou_strategy import OUStrategy
        from rllab.policies.deterministic_mlp_policy import DeterministicMLPPolicy
        from rllab.q_functions.continuous_mlp_q_function import ContinuousMLPQFunction
        from rllab.envs.gym_env import GymEnv
    except ImportError:
        print('rllab is not installed!')
        return None

    env = GymEnv('simglucose-adult{}-CHO{}-v0'.format(Initial_Bg,
                                                      patient_id + 1))
    env = normalize(env)

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each
        # with 32 hidden units.
        hidden_sizes=(32, 32))

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec)

    algo = DDPG(env=env,
                policy=policy,
                es=es,
                qf=qf,
                batch_size=32,
                max_path_length=100,
                epoch_length=1000,
                min_pool_size=10000,
                n_epochs=5,
                discount=0.99,
                scale_reward=0.01,
                qf_learning_rate=1e-3,
                policy_learning_rate=1e-4)
    algo.train()

    # env.close()

    return es, policy

Exemple #13

0

Afficher le fichier

def run_task(*_):
    # env = normalize(HalfCheetahEnv())

    env = normalize(GymEnv(env_name = "LunarLanderContinuous-v2",force_reset=True))
    # env = normalize(GymEnv(env_name="BipedalWalker-v2", force_reset=True, record_video=True))
    max_path_length = 400
    # print("env.horizon: ",env.horizon)
    # input()
    # env._max_episode_steps = max_path_length

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers
        hidden_sizes=(64, 64)
    )

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec,
                                hidden_sizes=(64, 64)
                                )

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=32,
        max_path_length=max_path_length,
        train_epoch_interval=300,
        min_pool_size=500,
        replay_pool_size = 10000,
        n_updates_per_sample =1,
        n_steps = 75000,
        discount=0.99,
        scale_reward=0.1,
        qf_learning_rate=1e-2,
        policy_learning_rate=1e-3,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train()

Exemple #14

0

Afficher le fichier

Fichier : run_ddpg_no_stub.py Projet : jonasnm/rllab

def run_task(*_):
    env = normalize(GymEnv(args.env, force_reset=True, record_video=False))
    env.wrapped_env.env.env.reward_flag = args.reward

    if args.hidden_sizes == 0:
        hidden_sizes=(8,)
    elif args.hidden_sizes == 1:
        hidden_sizes=(32, 32)
    elif args.hidden_sizes == 2:
        hidden_sizes=(100, 50, 25)
    elif args.hidden_sizes == 3:
        hidden_sizes=(400, 300)

    policy = DeterministicMLPPolicy(
        env_spec=env.spec,
        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=hidden_sizes
    )

    es = OUStrategy(env_spec=env.spec)

    qf = ContinuousMLPQFunction(env_spec=env.spec)

    algo = DDPG(
        env=env,
        policy=policy,
        es=es,
        qf=qf,
        batch_size=64,
        max_path_length=95,
        epoch_length=args.batch_size,
        min_pool_size=10000,
        n_epochs=args.n_itr,
        discount=args.gamma,
        scale_reward=args.scale_reward,
        qf_learning_rate=1e-3,
        policy_learning_rate=1e-4,
        eval_samples=95,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )
    algo.train()

Exemple #15

0

Afficher le fichier

Fichier : ddpg_fwmav.py Projet : ffnc1020/flappy-1

    qf_learning_rate=1e-3,  # Learning rate for training Q function
    policy_learning_rate=1e-4,  # Learning rate for training the policy
    #qf_weight_decay=0.01,
    soft_target_tau=
    0.005,  # Interpolation parameter for doing the soft target update.
    # Uncomment both lines (this and the plot parameter below) to enable plotting
    # plot=True,
)

log_dir = os.path.join(os.getcwd(), 'data')
logger.set_snapshot_dir(log_dir)
logger.add_text_output(os.path.join(log_dir, 'debug.log'))
logger.add_tabular_output(os.path.join(log_dir, 'progress.csv'))
logger.set_snapshot_mode('last')

algo.train()

# save parameters
with open(os.path.join(log_dir, 'final_policy.pkl'), 'wb') as output:
    trained_policy = algo.policy
    pickle.dump(trained_policy, output, pickle.HIGHEST_PROTOCOL)
print('Final policy saved')


def save_large_pickled_object(obj, filepath):
    """
	This is a defensive way to write pickle.write, allowing for very large files on all platforms
	"""
    max_bytes = 2**31 - 1
    bytes_out = pickle.dumps(obj)
    n_bytes = sys.getsizeof(bytes_out)

Exemple #16

0

Afficher le fichier

Fichier : ddpg_cartpole_stub.py Projet : BinbinBian/rllab

algo = DDPG(
    env=env,
    policy=policy,
    es=es,
    qf=qf,
    batch_size=32,
    max_path_length=100,
    epoch_length=1000,
    min_pool_size=10000,
    n_epochs=1000,
    discount=0.99,
    scale_reward=0.01,
    qf_learning_rate=1e-3,
    policy_learning_rate=1e-4,
    # Uncomment both lines (this and the plot parameter below) to enable plotting
    # plot=True,
)

run_experiment_lite(
    algo.train(),
    # Number of parallel workers for sampling
    n_parallel=1,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode="last",
    # Specifies the seed for the experiment. If this is not provided, a random seed
    # will be used
    seed=1,
    # plot=True,
)

Exemple #17

0

Afficher le fichier

        # The neural network policy should have two hidden layers, each with 32 hidden units.
        hidden_sizes=(128, 128))

    baseline = LinearFeatureBaseline(env_spec=env.spec)

    algo = TRPO(
        env=env,
        policy=policy,
        baseline=baseline,
        batch_size=4000,
        max_path_length=env.horizon,
        n_itr=500,
        discount=0.99,
        step_size=0.01,
        # Uncomment both lines (this and the plot parameter below) to enable plotting
        # plot=True,
    )

run_experiment_lite(
    algo.train(),
    # Number of parallel workers for sampling
    n_parallel=1,
    # Only keep the snapshot parameters for the last iteration
    snapshot_mode="last",
    # Specifies the seed for the experiment. If this is not provided, a random seed
    # will be used
    seed=1,
    use_gpu=True,
    # plot=True,
)