Beispiel #1
0
 def test_abort_on_all_empty_inputs(self):
     open(self.test_dir + "/empty", "w").close()
     reader = JsonReader([
         self.test_dir + "/empty",
     ])
     self.assertRaises(ValueError, lambda: reader.next())
     with open(self.test_dir + "/empty1", "w") as f:
         for _ in range(100):
             f.write("\n")
     with open(self.test_dir + "/empty2", "w") as f:
         for _ in range(100):
             f.write("\n")
     reader = JsonReader([
         self.test_dir + "/empty1",
         self.test_dir + "/empty2",
     ])
     self.assertRaises(ValueError, lambda: reader.next())
Beispiel #2
0
 def testAbortOnAllEmptyInputs(self):
     open(self.test_dir + "/empty", "w").close()
     reader = JsonReader([
         self.test_dir + "/empty",
     ])
     self.assertRaises(ValueError, lambda: reader.next())
     with open(self.test_dir + "/empty1", "w") as f:
         for _ in range(100):
             f.write("\n")
     with open(self.test_dir + "/empty2", "w") as f:
         for _ in range(100):
             f.write("\n")
     reader = JsonReader([
         self.test_dir + "/empty1",
         self.test_dir + "/empty2",
     ])
     self.assertRaises(ValueError, lambda: reader.next())
Beispiel #3
0
 def test_multiple_output_workers(self):
     ray.shutdown()
     ray.init(num_cpus=4, ignore_reinit_error=True)
     for fw in framework_iterator(frameworks=["tf", "torch"]):
         agent = PG(
             env="CartPole-v0",
             config={
                 "num_workers": 2,
                 "output": self.test_dir + fw,
                 "rollout_fragment_length": 250,
                 "framework": fw,
             },
         )
         agent.train()
         self.assertEqual(len(os.listdir(self.test_dir + fw)), 2)
         reader = JsonReader(self.test_dir + fw + "/*.json")
         reader.next()
Beispiel #4
0
 def testAbortOnAllEmptyInputs(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     open(self.test_dir + "/empty", "w").close()
     reader = JsonReader(ioctx, [
         self.test_dir + "/empty",
     ])
     self.assertRaises(ValueError, lambda: reader.next())
     with open(self.test_dir + "/empty1", "w") as f:
         for _ in range(100):
             f.write("\n")
     with open(self.test_dir + "/empty2", "w") as f:
         for _ in range(100):
             f.write("\n")
     reader = JsonReader(ioctx, [
         self.test_dir + "/empty1",
         self.test_dir + "/empty2",
     ])
     self.assertRaises(ValueError, lambda: reader.next())
Beispiel #5
0
    def test_agent_output_infos(self):
        """Verify that the infos dictionary is written to the output files.

        Note, with torch this is always the case.
        """
        output_config = {"store_infos": True}
        for fw in framework_iterator(frameworks=("torch", "tf")):
            self.write_outputs(self.test_dir, fw, output_config=output_config)
            self.assertEqual(len(os.listdir(self.test_dir + fw)), 1)
            reader = JsonReader(self.test_dir + fw + "/*.json")
            data = reader.next()
            assert "infos" in data
Beispiel #6
0
 def testReadWrite(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     writer = JsonWriter(
         self.test_dir, ioctx, max_file_size=5000, compress_columns=["obs"])
     for i in range(100):
         writer.write(make_sample_batch(i))
     reader = JsonReader(self.test_dir + "/*.json")
     seen_a = set()
     seen_o = set()
     for i in range(1000):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
         seen_o.add(batch["obs"][0])
     self.assertGreater(len(seen_a), 90)
     self.assertLess(len(seen_a), 101)
     self.assertGreater(len(seen_o), 90)
     self.assertLess(len(seen_o), 101)
Beispiel #7
0
 def testReadWrite(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     writer = JsonWriter(
         self.test_dir, ioctx, max_file_size=5000, compress_columns=["obs"])
     for i in range(100):
         writer.write(make_sample_batch(i))
     reader = JsonReader(self.test_dir + "/*.json")
     seen_a = set()
     seen_o = set()
     for i in range(1000):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
         seen_o.add(batch["obs"][0])
     self.assertGreater(len(seen_a), 90)
     self.assertLess(len(seen_a), 101)
     self.assertGreater(len(seen_o), 90)
     self.assertLess(len(seen_o), 101)
Beispiel #8
0
 def testSkipsOverCorruptedLines(self):
     with open(self.test_dir + "/f1", "w") as f:
         f.write(_to_json(make_sample_batch(0), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(2), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(3), []))
         f.write("\n")
         f.write("{..corrupted_json_record")
     reader = JsonReader([
         self.test_dir + "/f1",
     ])
     seen_a = set()
     for i in range(10):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 4)
Beispiel #9
0
 def test_skips_over_empty_lines_and_files(self):
     open(self.test_dir + "/empty", "w").close()
     with open(self.test_dir + "/f1", "w") as f:
         f.write("\n")
         f.write("\n")
         f.write(_to_json(make_sample_batch(0), []))
     with open(self.test_dir + "/f2", "w") as f:
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
     reader = JsonReader([
         self.test_dir + "/empty",
         self.test_dir + "/f1",
         "file://" + self.test_dir + "/f2",
     ])
     seen_a = set()
     for i in range(100):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 2)
Beispiel #10
0
 def test_skips_over_corrupted_lines(self):
     with open(self.test_dir + "/f1", "w") as f:
         f.write(_to_json(make_sample_batch(0), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(2), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(3), []))
         f.write("\n")
         f.write("{..corrupted_json_record")
     reader = JsonReader([
         self.test_dir + "/f1",
     ])
     seen_a = set()
     for i in range(10):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 4)
Beispiel #11
0
 def testSkipsOverEmptyLinesAndFiles(self):
     open(self.test_dir + "/empty", "w").close()
     with open(self.test_dir + "/f1", "w") as f:
         f.write("\n")
         f.write("\n")
         f.write(_to_json(make_sample_batch(0), []))
     with open(self.test_dir + "/f2", "w") as f:
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
     reader = JsonReader([
         self.test_dir + "/empty",
         self.test_dir + "/f1",
         "file:" + self.test_dir + "/f2",
     ])
     seen_a = set()
     for i in range(100):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 2)
Beispiel #12
0
 def testSkipsOverCorruptedLines(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     with open(self.test_dir + "/f1", "w") as f:
         f.write(_to_json(make_sample_batch(0), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(2), []))
         f.write("\n")
         f.write(_to_json(make_sample_batch(3), []))
         f.write("\n")
         f.write("{..corrupted_json_record")
     reader = JsonReader(ioctx, [
         self.test_dir + "/f1",
     ])
     seen_a = set()
     for i in range(10):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 4)
Beispiel #13
0
 def testSkipsOverEmptyLinesAndFiles(self):
     ioctx = IOContext(self.test_dir, {}, 0, None)
     open(self.test_dir + "/empty", "w").close()
     with open(self.test_dir + "/f1", "w") as f:
         f.write("\n")
         f.write("\n")
         f.write(_to_json(make_sample_batch(0), []))
     with open(self.test_dir + "/f2", "w") as f:
         f.write(_to_json(make_sample_batch(1), []))
         f.write("\n")
     reader = JsonReader(ioctx, [
         self.test_dir + "/empty",
         self.test_dir + "/f1",
         "file:" + self.test_dir + "/f2",
     ])
     seen_a = set()
     for i in range(100):
         batch = reader.next()
         seen_a.add(batch["actions"][0])
     self.assertEqual(len(seen_a), 2)
Beispiel #14
0
 def testAgentOutputOk(self):
     self.writeOutputs(self.test_dir)
     self.assertEqual(len(os.listdir(self.test_dir)), 1)
     ioctx = IOContext(self.test_dir, {}, 0, None)
     reader = JsonReader(ioctx, self.test_dir + "/*.json")
     reader.next()
Beispiel #15
0
 def test_agent_output_ok(self):
     for fw in framework_iterator(frameworks=("torch", "tf")):
         self.write_outputs(self.test_dir, fw)
         self.assertEqual(len(os.listdir(self.test_dir + fw)), 1)
         reader = JsonReader(self.test_dir + fw + "/*.json")
         reader.next()
Beispiel #16
0
 def testAgentOutputOk(self):
     self.writeOutputs(self.test_dir)
     self.assertEqual(len(os.listdir(self.test_dir)), 1)
     reader = JsonReader(self.test_dir + "/*.json")
     reader.next()
Beispiel #17
0
 def testAgentOutputOk(self):
     self.writeOutputs(self.test_dir)
     self.assertEqual(len(os.listdir(self.test_dir)), 1)
     reader = JsonReader(self.test_dir + "/*.json")
     reader.next()
Beispiel #18
0
class TorchCustomLossModel(TorchModelV2, nn.Module):
    """PyTorch version of the CustomLossModel above."""
    def __init__(self, obs_space, action_space, num_outputs, model_config,
                 name, input_files):
        super().__init__(obs_space, action_space, num_outputs, model_config,
                         name)
        nn.Module.__init__(self)

        self.input_files = input_files
        # Create a new input reader per worker.
        self.reader = JsonReader(self.input_files)
        self.fcnet = TorchFC(self.obs_space,
                             self.action_space,
                             num_outputs,
                             model_config,
                             name="fcnet")

    @override(ModelV2)
    def forward(self, input_dict, state, seq_lens):
        # Delegate to our FCNet.
        return self.fcnet(input_dict, state, seq_lens)

    @override(ModelV2)
    def custom_loss(self, policy_loss, loss_inputs):
        """Calculates a custom loss on top of the given policy_loss(es).

        Args:
            policy_loss (List[TensorType]): The list of already calculated
                policy losses (as many as there are optimizers).
            loss_inputs (TensorStruct): Struct of np.ndarrays holding the
                entire train batch.

        Returns:
            List[TensorType]: The altered list of policy losses. In case the
                custom loss should have its own optimizer, make sure the
                returned list is one larger than the incoming policy_loss list.
                In case you simply want to mix in the custom loss into the
                already calculated policy losses, return a list of altered
                policy losses (as done in this example below).
        """
        # Get the next batch from our input files.
        batch = self.reader.next()

        # Define a secondary loss by building a graph copy with weight sharing.
        obs = restore_original_dimensions(torch.from_numpy(
            batch["obs"]).float(),
                                          self.obs_space,
                                          tensorlib="torch")
        logits, _ = self.forward({"obs": obs}, [], None)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # Compute the IL loss.
        action_dist = TorchCategorical(logits, self.model_config)
        imitation_loss = torch.mean(
            -action_dist.logp(torch.from_numpy(batch["actions"])))
        self.imitation_loss_metric = imitation_loss.item()
        self.policy_loss_metric = np.mean([l.item() for l in policy_loss])

        # Add the imitation loss to each already calculated policy loss term.
        # Alternatively (if custom loss has its own optimizer):
        # return policy_loss + [10 * self.imitation_loss]
        return [loss_ + 10 * imitation_loss for loss_ in policy_loss]

    def metrics(self):
        return {
            "policy_loss": self.policy_loss_metric,
            "imitation_loss": self.imitation_loss_metric,
        }
Beispiel #19
0
    def test_marwil_loss_function(self):
        """
        To generate the historic data used in this test case, first run:
        $ ./train.py --run=PPO --env=CartPole-v0 \
          --stop='{"timesteps_total": 50000}' \
          --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}'
        """
        rllib_dir = Path(__file__).parent.parent.parent.parent
        print("rllib dir={}".format(rllib_dir))
        data_file = os.path.join(rllib_dir, "tests/data/cartpole/small.json")
        print("data_file={} exists={}".format(data_file,
                                              os.path.isfile(data_file)))
        config = marwil.DEFAULT_CONFIG.copy()
        config["num_workers"] = 0  # Run locally.
        # Learn from offline data.
        config["input"] = [data_file]

        for fw in framework_iterator(config, frameworks=["torch", "tf2"]):
            reader = JsonReader(inputs=[data_file])
            batch = reader.next()

            trainer = marwil.MARWILTrainer(config=config, env="CartPole-v0")
            policy = trainer.get_policy()
            model = policy.model

            # Calculate our own expected values (to then compare against the
            # agent's loss output).
            cummulative_rewards = compute_advantages(batch, 0.0,
                                                     config["gamma"], 1.0,
                                                     False,
                                                     False)["advantages"]
            if fw == "torch":
                cummulative_rewards = torch.tensor(cummulative_rewards)
            batch = policy._lazy_tensor_dict(batch)
            model_out, _ = model.from_batch(batch)
            vf_estimates = model.value_function()
            adv = cummulative_rewards - vf_estimates
            if fw == "torch":
                adv = adv.detach().cpu().numpy()
            adv_squared = np.mean(np.square(adv))
            c_2 = 100.0 + 1e-8 * (adv_squared - 100.0)
            c = np.sqrt(c_2)
            exp_advs = np.exp(config["beta"] * (adv / c))
            logp = policy.dist_class(model_out, model).logp(batch["actions"])
            if fw == "torch":
                logp = logp.detach().cpu().numpy()
            # Calculate all expected loss components.
            expected_vf_loss = 0.5 * adv_squared
            expected_pol_loss = -1.0 * np.mean(exp_advs * logp)
            expected_loss = \
                expected_pol_loss + config["vf_coeff"] * expected_vf_loss

            # Calculate the algorithm's loss (to check against our own
            # calculation above).
            batch.set_get_interceptor(None)
            postprocessed_batch = policy.postprocess_trajectory(batch)
            loss_func = marwil.marwil_tf_policy.marwil_loss if fw != "torch" \
                else marwil.marwil_torch_policy.marwil_loss
            loss_out = loss_func(policy, model, policy.dist_class,
                                 policy._lazy_tensor_dict(postprocessed_batch))

            # Check all components.
            if fw == "torch":
                check(policy.v_loss, expected_vf_loss, decimals=4)
                check(policy.p_loss, expected_pol_loss, decimals=4)
            else:
                check(policy.loss.v_loss, expected_vf_loss, decimals=4)
                check(policy.loss.p_loss, expected_pol_loss, decimals=4)
            check(loss_out, expected_loss, decimals=3)
Beispiel #20
0
    def test_marwil_loss_function(self):
        """
        To generate the historic data used in this test case, first run:
        $ ./train.py --run=PPO --env=CartPole-v0 \
          --stop='{"timesteps_total": 50000}' \
          --config='{"output": "/tmp/out", "batch_mode": "complete_episodes"}'
        """
        rllib_dir = Path(__file__).parent.parent.parent.parent
        print("rllib dir={}".format(rllib_dir))
        data_file = os.path.join(rllib_dir, "tests/data/cartpole/small.json")
        print("data_file={} exists={}".format(data_file,
                                              os.path.isfile(data_file)))

        config = (marwil.MARWILConfig().rollouts(
            num_rollout_workers=0).offline_data(input_=[data_file])
                  )  # Learn from offline data.

        for fw, sess in framework_iterator(config, session=True):
            reader = JsonReader(inputs=[data_file])
            batch = reader.next()

            trainer = config.build(env="CartPole-v0")
            policy = trainer.get_policy()
            model = policy.model

            # Calculate our own expected values (to then compare against the
            # agent's loss output).
            cummulative_rewards = compute_advantages(batch, 0.0, config.gamma,
                                                     1.0, False,
                                                     False)["advantages"]
            if fw == "torch":
                cummulative_rewards = torch.tensor(cummulative_rewards)
            if fw != "tf":
                batch = policy._lazy_tensor_dict(batch)
            model_out, _ = model(batch)
            vf_estimates = model.value_function()
            if fw == "tf":
                model_out, vf_estimates = policy.get_session().run(
                    [model_out, vf_estimates])
            adv = cummulative_rewards - vf_estimates
            if fw == "torch":
                adv = adv.detach().cpu().numpy()
            adv_squared = np.mean(np.square(adv))
            c_2 = 100.0 + 1e-8 * (adv_squared - 100.0)
            c = np.sqrt(c_2)
            exp_advs = np.exp(config.beta * (adv / c))
            dist = policy.dist_class(model_out, model)
            logp = dist.logp(batch["actions"])
            if fw == "torch":
                logp = logp.detach().cpu().numpy()
            elif fw == "tf":
                logp = sess.run(logp)
            # Calculate all expected loss components.
            expected_vf_loss = 0.5 * adv_squared
            expected_pol_loss = -1.0 * np.mean(exp_advs * logp)
            expected_loss = expected_pol_loss + config.vf_coeff * expected_vf_loss

            # Calculate the algorithm's loss (to check against our own
            # calculation above).
            batch.set_get_interceptor(None)
            postprocessed_batch = policy.postprocess_trajectory(batch)
            loss_func = (MARWILTF2Policy.loss
                         if fw != "torch" else MARWILTorchPolicy.loss)
            if fw != "tf":
                policy._lazy_tensor_dict(postprocessed_batch)
                loss_out = loss_func(policy, model, policy.dist_class,
                                     postprocessed_batch)
            else:
                loss_out, v_loss, p_loss = policy.get_session().run(
                    # policy._loss is create by TFPolicy, and is basically the
                    # loss tensor of the static graph.
                    [
                        policy._loss,
                        policy._marwil_loss.v_loss,
                        policy._marwil_loss.p_loss,
                    ],
                    feed_dict=policy._get_loss_inputs_dict(postprocessed_batch,
                                                           shuffle=False),
                )

            # Check all components.
            if fw == "torch":
                check(policy.v_loss, expected_vf_loss, decimals=4)
                check(policy.p_loss, expected_pol_loss, decimals=4)
            elif fw == "tf":
                check(v_loss, expected_vf_loss, decimals=4)
                check(p_loss, expected_pol_loss, decimals=4)
            else:
                check(policy._marwil_loss.v_loss, expected_vf_loss, decimals=4)
                check(policy._marwil_loss.p_loss,
                      expected_pol_loss,
                      decimals=4)
            check(loss_out, expected_loss, decimals=3)