Example #1
0
def _run(init_dir, dataset, model, loss_fn, optimizer, rank):
    with tempfile.TemporaryDirectory() as tmpdir:
        distributed.initialize(init_dir, rank, 2)

        output = os.path.join(tmpdir, "out")
        train_supervised(output,
                         dataset,
                         model,
                         optimizer,
                         loss_fn,
                         MockEvaluate("key"),
                         "key",
                         collate.collate,
                         1,
                         Epoch(2),
                         n_dataloader_worker=0)
        if rank == 0:
            assert os.path.exists(os.path.join(output, "snapshot_iter_2"))

            assert os.path.exists(os.path.join(output, "log.json"))
            with open(os.path.join(output, "log.json")) as file:
                log = json.load(file)
            assert isinstance(log, list)
            assert 1 == len(log)
            assert 1 == len(os.listdir(os.path.join(output, "model")))
            assert os.path.exists(os.path.join(output, "model.pt"))
            assert os.path.exists(os.path.join(output, "optimizer.pt"))
        else:
            assert not os.path.exists(os.path.join(output, "snapshot_iter_2"))
            assert not os.path.exists(os.path.join(output, "log.json"))
            assert not os.path.exists(os.path.join(output, "model"))
        return model.state_dict(), optimizer.state_dict()
Example #2
0
 def test_remove_old_snapshots(self, dataset, model, loss_fn, optimizer):
     with tempfile.TemporaryDirectory() as tmpdir:
         output = os.path.join(tmpdir, "out")
         train_supervised(output, dataset, model, optimizer, loss_fn,
                          MockEvaluate("key"), "key", collate.collate, 1,
                          Epoch(2))
         assert os.path.exists(os.path.join(output, "snapshot_iter_6"))
Example #3
0
    def test_iterable_dataset(self, iterable_dataset, model, loss_fn,
                              optimizer):
        with tempfile.TemporaryDirectory() as tmpdir:
            ws = os.path.join(tmpdir, "ws")
            output = os.path.join(tmpdir, "out")
            train_supervised(ws,
                             output,
                             iterable_dataset,
                             model,
                             optimizer,
                             loss_fn,
                             MockEvaluate("key"),
                             "key",
                             collate.collate,
                             1,
                             Iteration(2),
                             evaluation_interval=Iteration(1))
            assert os.path.exists(os.path.join(ws, "snapshot_iter_2"))
            assert os.path.exists(os.path.join(ws, "log"))
            with open(os.path.join(ws, "log")) as file:
                log = json.load(file)
            assert isinstance(log, list)
            assert 1 == len(log)
            assert 1 == len(os.listdir(os.path.join(ws, "model")))

            assert os.path.exists(os.path.join(output, "log.json"))
            with open(os.path.join(output, "log.json")) as file:
                log = json.load(file)
            assert isinstance(log, list)
            assert 1 == len(log)
            assert 1 == len(os.listdir(os.path.join(output, "model")))
Example #4
0
    def train(self, output_dir):
        with tempfile.TemporaryDirectory() as tmpdir:
            loss_fn = nn.Sequential(
                OrderedDict([
                    ("loss",
                     Apply(
                         module=Loss(),
                         in_keys=[
                             "rule_probs",
                             "token_probs",
                             "reference_probs",
                             "ground_truth_actions",
                         ],
                         out_key="action_sequence_loss",
                     )),
                    ("pick",
                     mlprogram.nn.Function(Pick("action_sequence_loss")))
                ]))
            collate = Collate(word_nl_query=CollateOptions(True, 0, -1),
                              nl_query_features=CollateOptions(True, 0, -1),
                              reference_features=CollateOptions(True, 0, -1),
                              actions=CollateOptions(True, 0, -1),
                              previous_actions=CollateOptions(True, 0, -1),
                              previous_action_rules=CollateOptions(
                                  True, 0, -1),
                              history=CollateOptions(False, 1, 0),
                              hidden_state=CollateOptions(False, 0, 0),
                              state=CollateOptions(False, 0, 0),
                              ground_truth_actions=CollateOptions(True, 0,
                                                                  -1)).collate

            qencoder, aencoder = \
                self.prepare_encoder(train_dataset, Parser())
            transform = Map(self.transform_cls(qencoder, aencoder, Parser()))
            model = self.prepare_model(qencoder, aencoder)
            optimizer = self.prepare_optimizer(model)
            train_supervised(tmpdir,
                             output_dir,
                             train_dataset,
                             model,
                             optimizer,
                             loss_fn,
                             EvaluateSynthesizer(test_dataset,
                                                 self.prepare_synthesizer(
                                                     model, qencoder,
                                                     aencoder),
                                                 {"accuracy": Accuracy()},
                                                 top_n=[5]),
                             "accuracy@5",
                             lambda x: collate(transform(x)),
                             1,
                             Epoch(100),
                             evaluation_interval=Epoch(100),
                             snapshot_interval=Epoch(100),
                             threshold=1.0)
        return qencoder, aencoder
Example #5
0
 def test_skip_evaluation(self, dataset, model, loss_fn, optimizer):
     with tempfile.TemporaryDirectory() as tmpdir:
         output = os.path.join(tmpdir, "out")
         train_supervised(output, dataset, model, optimizer, loss_fn, None,
                          "key", collate.collate, 1, Epoch(2))
         assert os.path.exists(os.path.join(output, "snapshot_iter_6"))
         assert os.path.exists(os.path.join(output, "log.json"))
         with open(os.path.join(output, "log.json")) as file:
             log = json.load(file)
         assert isinstance(log, list)
         assert 1 == len(log)
         assert 0 == len(os.listdir(os.path.join(output, "model")))
         assert os.path.exists(os.path.join(output, "model.pt"))
         assert os.path.exists(os.path.join(output, "optimizer.pt"))
Example #6
0
    def test_resume_from_checkpoint(self, dataset, model, loss_fn, optimizer):
        with tempfile.TemporaryDirectory() as tmpdir:
            output = os.path.join(tmpdir, "out")
            train_supervised(output, dataset, model, optimizer, loss_fn,
                             MockEvaluate("key"), "key", collate.collate, 1,
                             Epoch(1))
            with open(os.path.join(output, "log.json")) as file:
                log = json.load(file)

            train_supervised(output, dataset, model, optimizer, loss_fn,
                             MockEvaluate("key"), "key", collate.collate, 1,
                             Epoch(2))
            assert os.path.exists(os.path.join(output, "snapshot_iter_6"))
            with open(os.path.join(output, "log.json")) as file:
                log2 = json.load(file)
            assert log[0] == log2[0]
            assert 2 == len(log2)
Example #7
0
 def test_threshold(self, dataset, model, loss_fn, optimizer):
     with tempfile.TemporaryDirectory() as tmpdir:
         output = os.path.join(tmpdir, "out")
         train_supervised(output,
                          dataset,
                          model,
                          optimizer,
                          loss_fn,
                          MockEvaluate("key"),
                          "key",
                          collate.collate,
                          1,
                          Epoch(2),
                          threshold=0.0)
         assert 1 == len(os.listdir(os.path.join(output, "model")))
         assert os.path.exists(os.path.join(output, "model.pt"))
         assert os.path.exists(os.path.join(output, "optimizer.pt"))
Example #8
0
    def test_resume_from_eval_mode(self, dataset, loss_fn):
        class DummyModel(nn.Module):
            def __init__(self):
                super().__init__()
                self.m = nn.Linear(1, 1)

            def forward(self, kwargs):
                assert self.training
                kwargs["value"] = self.m(kwargs["value"].float())
                return kwargs

        class MockEvaluate(object):
            def __init__(self, key, model):
                self.key = key
                self.model = model

            def __call__(self):
                self.model.eval()
                report({self.key: 0.0})

        with tempfile.TemporaryDirectory() as tmpdir:
            ws = os.path.join(tmpdir, "ws")
            output = os.path.join(tmpdir, "out")
            model = DummyModel()
            train_supervised(ws, output, dataset, model,
                             torch.optim.SGD(model.parameters(),
                                             lr=0.1), loss_fn,
                             MockEvaluate("key", model), "key",
                             collate.collate, 1, Epoch(2))
            assert os.path.exists(os.path.join(ws, "snapshot_iter_6"))
            assert os.path.exists(os.path.join(ws, "log"))
            with open(os.path.join(ws, "log")) as file:
                log = json.load(file)
            assert isinstance(log, list)
            assert 1 == len(log)
            assert 1 == len(os.listdir(os.path.join(ws, "model")))

            assert os.path.exists(os.path.join(output, "log.json"))
            with open(os.path.join(output, "log.json")) as file:
                log = json.load(file)
            assert isinstance(log, list)
            assert 1 == len(log)
            assert 1 == len(os.listdir(os.path.join(output, "model")))
            assert os.path.exists(os.path.join(output, "model.pt"))
            assert os.path.exists(os.path.join(output, "optimizer.pt"))
    def pretrain(self, output_dir):
        dataset = Dataset(4, 1, 2, 1, 45, seed=0)
        """
        """
        train_dataset = ListDataset([
            Environment(
                {"ground_truth": Circle(1)},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Rectangle(1, 2)},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Rectangle(1, 1)},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Rotation(45, Rectangle(1, 1))},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Translation(1, 1, Rectangle(1, 1))},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Difference(Circle(1), Circle(1))},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Union(Rectangle(1, 2), Circle(1))},
                set(["ground_truth"]),
            ),
            Environment(
                {"ground_truth": Difference(Rectangle(1, 1), Circle(1))},
                set(["ground_truth"]),
            ),
        ])

        with tempfile.TemporaryDirectory() as tmpdir:
            interpreter = self.interpreter()
            train_dataset = data_transform(
                train_dataset,
                Apply(
                    module=AddTestCases(interpreter),
                    in_keys=["ground_truth"],
                    out_key="test_cases",
                    is_out_supervision=False,
                ))
            encoder = self.prepare_encoder(dataset, Parser())

            collate = Collate(
                test_case_tensor=CollateOptions(False, 0, 0),
                variables_tensor=CollateOptions(True, 0, 0),
                previous_actions=CollateOptions(True, 0, -1),
                hidden_state=CollateOptions(False, 0, 0),
                state=CollateOptions(False, 0, 0),
                ground_truth_actions=CollateOptions(True, 0, -1)
            )
            collate_fn = Sequence(OrderedDict([
                ("to_episode", Map(self.to_episode(encoder,
                                                   interpreter))),
                ("flatten", Flatten()),
                ("transform", Map(self.transform(
                    encoder, interpreter, Parser()))),
                ("collate", collate.collate)
            ]))

            model = self.prepare_model(encoder)
            optimizer = self.prepare_optimizer(model)
            train_supervised(
                tmpdir, output_dir,
                train_dataset, model, optimizer,
                torch.nn.Sequential(OrderedDict([
                    ("loss",
                     Apply(
                         module=Loss(
                             reduction="sum",
                         ),
                         in_keys=[
                             "rule_probs",
                             "token_probs",
                             "reference_probs",
                             "ground_truth_actions",
                         ],
                         out_key="action_sequence_loss",
                     )),
                    ("normalize",  # divided by batch_size
                     Apply(
                         [("action_sequence_loss", "lhs")],
                         "loss",
                         mlprogram.nn.Function(Div()),
                         constants={"rhs": 1})),
                    ("pick",
                     mlprogram.nn.Function(
                         Pick("loss")))
                ])),
                None, "score",
                collate_fn,
                1, Epoch(100), evaluation_interval=Epoch(10),
                snapshot_interval=Epoch(100)
            )
        return encoder, train_dataset