Python PPOTrainer.stop Examples

Programming Language: Python

Namespace/Package Name: ray.rllib.agents.ppo

Class/Type: PPOTrainer

Method/Function: stop

Examples at hotexamples.com: 11

Python PPOTrainer.stop - 11 examples found. These are the top rated real world Python examples of ray.rllib.agents.ppo.PPOTrainer.stop extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PPOTrainer(30)

train(30)

restore(30)

get_policy(26)

save(22)

compute_action(17)

stop(11)

with_updates(8)

set_weights(3)

get_weights(3)

compute_single_action(3)

import_model(2)

restore_from_object(2)

save_to_object(2)

export_policy_model(1)

export_model(1)

load_checkpoint(1)

merge_trainer_configs(1)

evaluate(1)

env_creator(1)

default_resource_request(1)

latest_avg_trainer_result(1)

Example #1

Show file

File: test_backward_compat.py Project: vishalbelsare/ray

    def test_old_configs(self):
        """Tests creating various Trainers (Algorithms) using 1.10 config dicts."""
        from ray.rllib.tests.backward_compat.old_ppo import DEFAULT_CONFIG
        from ray.rllib.agents.ppo import PPOTrainer

        config = DEFAULT_CONFIG.copy()
        trainer = PPOTrainer(config=config, env="CartPole-v0")
        trainer.train()
        trainer.stop()

Example #2

Show file

File: main.py Project: wouterreijgers/Simulationv2

def Hunter_trainer(config, reporter):
    multi_hunter_trainer = PPOTrainer(MultiHunterEnv, config)
    for _ in range(100):
        environment.simulate()
        result = multi_hunter_trainer.train()
        result["phase"] = 1
        reporter(**result)
        phase1_time = result["timesteps_total"]
    state = multi_hunter_trainer.save()
    multi_hunter_trainer.stop()

Example #3

Show file

def main():
    ray.init()

    #  Hyperparameters of PPO are not well tuned. Most of them refer to https://github.com/xtma/pytorch_car_caring/blob/master/train.py
    trainer = PPOTrainer(env=MyEnv,
                         config={
                             "use_pytorch": True,
                             "model": {
                                 "custom_model": "mymodel",
                                 "custom_options": {
                                     'encoder_path': args.encoder_path,
                                     'train_encoder': args.train_encoder
                                 },
                                 "custom_action_dist": "mydist",
                             },
                             "env_config": {
                                 'game': 'CarRacing'
                             },
                             "num_workers": args.num_workers,
                             "num_envs_per_worker": args.num_envs_per_worker,
                             "num_gpus": args.num_gpus,
                             "use_gae": args.use_gae,
                             "batch_mode": args.batch_mode,
                             "vf_loss_coeff": args.vf_loss_coeff,
                             "vf_clip_param": args.vf_clip_param,
                             "lr": args.lr,
                             "kl_coeff": args.kl_coeff,
                             "num_sgd_iter": args.num_sgd_iter,
                             "grad_clip": args.grad_clip,
                             "clip_param": args.clip_param,
                             "rollout_fragment_length":
                             args.rollout_fragment_length,
                             "train_batch_size": args.train_batch_size,
                             "sgd_minibatch_size": args.sgd_minibatch_size
                         })

    for i in range(args.train_epochs):
        trainer.train()
        print("%d Train Done" % (i), "Save Freq: %d" % (args.model_save_freq))
        if (i + 1) % args.model_save_freq == 0:
            print("%d Episodes Done" % (i))
            weights = trainer.get_policy().get_weights()
            torch.save(weights, args.model_save_path + "%d-mode.pt" % (i + 1))
    trainer.save(args.trainer_save_path)
    print("Done All!")
    trainer.stop()

Example #4

Show file

def my_train_fn(config, reporter):
    # Train for n iterations with high LR
    agent1 = PPOTrainer(env="CartPole-v0", config=config)
    for _ in range(10):
        result = agent1.train()
        result["phase"] = 1
        reporter(**result)
        phase1_time = result["timesteps_total"]
    state = agent1.save()
    agent1.stop()

    # Train for n iterations with low LR
    config["lr"] = 0.0001
    agent2 = PPOTrainer(env="CartPole-v0", config=config)
    agent2.restore(state)
    for _ in range(10):
        result = agent2.train()
        result["phase"] = 2
        result["timesteps_total"] += phase1_time  # keep time moving forward
        reporter(**result)
    agent2.stop()

Example #5

Show file

    def test_ppo_sample_waste(self):
        # Check we at least collect the initial wave of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "rollout_fragment_length": 200,
                             "train_batch_size": 128,
                             "num_workers": 3,
                         })
        result = ppo.train()
        self.assertEqual(result["info"]["num_steps_sampled"], 600)
        ppo.stop()

        # Check we collect at least the specified amount of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "rollout_fragment_length": 200,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        result = ppo.train()
        self.assertEqual(result["info"]["num_steps_sampled"], 1200)
        ppo.stop()

        # Check in vectorized mode
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "rollout_fragment_length": 200,
                             "num_envs_per_worker": 2,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        result = ppo.train()
        self.assertEqual(result["info"]["num_steps_sampled"], 1200)
        ppo.stop()

Example #6

Show file

File: test_optimizers.py Project: denklewer/ray

    def testPPOSampleWaste(self):
        ray.init(num_cpus=4)

        # Check we at least collect the initial wave of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "train_batch_size": 128,
                             "num_workers": 3,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 600)
        ppo.stop()

        # Check we collect at least the specified amount of samples
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1000)
        ppo.stop()

        # Check in vectorized mode
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "num_envs_per_worker": 2,
                             "train_batch_size": 900,
                             "num_workers": 3,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1200)
        ppo.stop()

        # Check legacy mode
        ppo = PPOTrainer(env="CartPole-v0",
                         config={
                             "sample_batch_size": 200,
                             "train_batch_size": 128,
                             "num_workers": 3,
                             "straggler_mitigation": True,
                         })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 200)
        ppo.stop()

Example #7

Show file

File: test_optimizers.py Project: drmeitingtu/ray

    def test_ppo_sample_waste(self):
        # Check we at least collect the initial wave of samples
        ppo = PPOTrainer(
            env="CartPole-v0",
            config={
                "sample_batch_size": 200,
                "train_batch_size": 128,
                "num_workers": 3,
            })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 600)
        ppo.stop()

        # Check we collect at least the specified amount of samples
        ppo = PPOTrainer(
            env="CartPole-v0",
            config={
                "sample_batch_size": 200,
                "train_batch_size": 900,
                "num_workers": 3,
            })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1000)
        ppo.stop()

        # Check in vectorized mode
        ppo = PPOTrainer(
            env="CartPole-v0",
            config={
                "sample_batch_size": 200,
                "num_envs_per_worker": 2,
                "train_batch_size": 900,
                "num_workers": 3,
            })
        ppo.train()
        self.assertEqual(ppo.optimizer.num_steps_sampled, 1200)
        ppo.stop()

Example #8

Show file

File: acrobot_ppo.py Project: Wandercraft/jiminy

agent_cfg[
    "shuffle_sequences"] = True  # Whether to shuffle sequences in the batch when training
agent_cfg[
    "grad_clip"] = None  # Clamp the norm of the gradient during optimization (None to disable)

# ====================== Run the optimization ======================

agent_cfg["lr"] = 1.0e-4
agent_cfg["lr_schedule"] = None

train_agent = Trainer(agent_cfg, "env", logger_creator)
checkpoint_path = train(train_agent, max_timesteps=100000)

# ===================== Enjoy the trained agent ======================

test_agent = Trainer(agent_cfg, "env", logger_creator)
test_agent.restore(checkpoint_path)
test(test_agent, explore=False)

# =================== Terminate Ray backend ====================

train_agent.stop()
test_agent.stop()
ray.shutdown()

# =================== Terminate the Ray backend ====================

train_agent.stop()
test_agent.stop()
ray.shutdown()

Example #9

Show file

 def test_local(self):
     cf = DEFAULT_CONFIG.copy()
     for _ in framework_iterator(cf):
         agent = PPOTrainer(cf, "CartPole-v0")
         print(agent.train())
         agent.stop()

Example #10

Show file

File: restore_1_of_n_agents_from_checkpoint.py Project: stjordanis/ray

    # Create a new dummy Trainer to "fix" our checkpoint.
    new_trainer = PPOTrainer(config=config)
    # Get untrained weights for all policies.
    untrained_weights = new_trainer.get_weights()
    # Restore all policies from checkpoint.
    new_trainer.restore(best_checkpoint)
    # Set back all weights (except for 1st agent) to original
    # untrained weights.
    new_trainer.set_weights(
        {pid: w
         for pid, w in untrained_weights.items() if pid != "policy_0"})
    # Create the checkpoint from which tune can pick up the
    # experiment.
    new_checkpoint = new_trainer.save()
    new_trainer.stop()
    print(".. checkpoint to restore from (all policies reset, "
          f"except policy_0): {new_checkpoint}")

    print("Starting new tune.run")

    # Start our actual experiment.
    stop = {
        "episode_reward_mean": args.stop_reward,
        "timesteps_total": args.stop_timesteps,
        "training_iteration": args.stop_iters,
    }

    # Make sure, the non-1st policies are not updated anymore.
    config["multiagent"]["policies_to_train"] = [
        pid for pid in policy_ids if pid != "policy_0"

Example #11

Show file

    #                     else random.choice(["fb_1", "fb_2"])
    #         },
    #     },
    # )

    trainer = PPOTrainer(
        env="rcrs_env",
        config={
            "env": "rcrs_env",
            "num_workers": 1,
            "multiagent": {
                "policies": {
                    "fb_1": (None, obs_space, act_space, {}),
                    "fb_2": (None, obs_space, act_space, {}),
                },
                "policy_mapping_fn":
                lambda agent_id: "fb_1" if agent_id.startswith("fb_1_") else
                random.choice(["fb_1", "fb_2"])
            },
        },
    )

    for i in range(2):
        result = trainer.train()
        print(pretty_print(result))
        if i % 1 == 0:
            checkpoint = trainer.save()
            print("checkpoint saved at", checkpoint)
    statess = trainer.save()
    trainer.stop()