Esempio n. 1
0
 def test_multiple_models(self, dataset, model, synthesizer):
     with tempfile.TemporaryDirectory() as tmpdir:
         input = os.path.join(tmpdir, "input")
         output = os.path.join(tmpdir, "output")
         os.makedirs(input)
         os.makedirs(os.path.join(input, "model"))
         torch.save({"score": 0.5, "model": {"score": 0.5, "name": "tmp"}},
                    os.path.join(input, "model", "0"))
         torch.save({"score": 1.0, "model": {"score": 1.0, "name": "tmp"}},
                    os.path.join(input, "model", "1"))
         evaluate(input, output, dataset,
                  model, synthesizer,
                  {
                      "accuracy": use_environment(
                          Accuracy(),
                          in_keys=["actual", ["ground_truth", "expected"]],
                          value_key="actual",
                      ),
                      "bleu": use_environment(
                          Bleu(),
                          in_keys=["actual", ["ground_truth", "expected"]],
                          value_key="actual",
                      ),
                  })
         assert os.path.exists(os.path.join(output, "result.pt"))
         assert os.path.exists(
             os.path.join(output, "result_metrics.json"))
Esempio n. 2
0
    def test_multiprocess(self):
        accuracy = use_environment(
            Accuracy(), in_keys=["actual", ["ground_truth", "expected"]],
            value_key="actual"
        )
        dataset = ListDataset([
            Environment(
                {"query": "query0", "ground_truth": "c0"},
                set(["ground_truth"])
            ),
            Environment(
                {"query": "query1", "ground_truth": "c0"},
                set(["ground_truth"])
            ),
            Environment(
                {"query": "query2", "ground_truth": "c0"},
                set(["ground_truth"])
            ),
        ])

        with tempfile.TemporaryDirectory() as init_dir:
            with context.Pool(2) as pool:
                procs = []
                for i in range(2):
                    p = pool.apply_async(
                        self._run,
                        args=(init_dir, dataset, {"accuracy": accuracy}, i),
                    )
                    procs.append(p)
                out = [p.get() for p in procs]
        r0 = out[0]
        r1 = out[1]

        assert r0 == r1

        results = r0
        assert results.metrics == {1: {"accuracy": 1.0 / 3},
                                   3: {"accuracy": 2.0 / 3}}
        assert 3 == len(results.results)
        results.results[0].time = 0.0
        results.results[1].time = 0.0
        results.results[2].time = 0.0
        results.results.sort(key=lambda x: x.sample["query"])
        assert Result({"query": "query0",
                       "ground_truth": "c0"},
                      ["c0", "c1", "c2"],
                      {1: {"accuracy": 1.0}, 3: {"accuracy": 1.0}},
                      True, 0.0) == results.results[0]
        assert Result({"query": "query1",
                       "ground_truth": "c0"},
                      ["c2", "c3", "c0"],
                      {1: {"accuracy": 0.0}, 3: {"accuracy": 1.0}},
                      True, 0.0) == results.results[1]
        assert Result({"query": "query2",
                       "ground_truth": "c0"},
                      ["c2", "c3", "c5"],
                      {1: {"accuracy": 0.0}, 3: {"accuracy": 0.0}},
                      True, 0.0) == results.results[2]
Esempio n. 3
0
 def _run(self, init_dir, input, output, model, synthesizer, dataset, rank):
     distributed.initialize(init_dir, rank, 2)
     evaluate(
         input, output, dataset,
         model, synthesizer,
         {
             "accuracy": use_environment(
                 Accuracy(),
                 in_keys=["actual", ["ground_truth", "expected"]],
                 value_key="actual",
             ),
             "bleu": use_environment(
                 Bleu(),
                 in_keys=["actual", ["ground_truth", "expected"]],
                 value_key="actual",
             ),
         }
     )
Esempio n. 4
0
 def evaluate(self, qencoder, aencoder, dir):
     model = self.prepare_model(qencoder, aencoder)
     eval(
         dir,
         dir,
         test_dataset,
         model,
         self.prepare_synthesizer(model, qencoder, aencoder),
         {
             "accuracy":
             use_environment(
                 metric=Accuracy(),
                 in_keys=["actual", ["ground_truth", "expected"]],
                 value_key="actual")
         },
         top_n=[5],
     )
     return torch.load(os.path.join(dir, "result.pt"))
Esempio n. 5
0
    def test_simple_case(self):
        accuracy = use_environment(
            Accuracy(), in_keys=["actual", ["ground_truth", "expected"]],
            value_key="actual"
        )
        dataset = ListDataset([
            Environment(
                {"query": "query0", "ground_truth": "c0"},
                set(["ground_truth"])
            ),
            Environment(
                {"query": "query1", "ground_truth": "c0"},
                set(["ground_truth"])
            ),
            Environment(
                {"query": "query2", "ground_truth": "c0"},
                set(["ground_truth"])
            ),
        ])
        results = EvaluateSynthesizer(dataset, synthesize,
                                      metrics={"accuracy": accuracy})()

        assert results.metrics == \
            {1: {"accuracy": 1.0 / 3.0}, 3: {"accuracy": 2.0 / 3.0}}
        assert 3 == len(results.results)
        results.results[0].time = 0.0
        results.results[1].time = 0.0
        results.results[2].time = 0.0
        assert Result({"query": "query0",
                       "ground_truth": "c0"},
                      ["c0", "c1", "c2"],
                      {1: {"accuracy": 1.0}, 3: {"accuracy": 1.0}},
                      True, 0.0) == results.results[0]
        assert Result({"query": "query1",
                       "ground_truth": "c0"},
                      ["c2", "c3", "c0"],
                      {1: {"accuracy": 0.0}, 3: {"accuracy": 1.0}},
                      True, 0.0) == results.results[1]
        assert Result({"query": "query2",
                       "ground_truth": "c0"},
                      ["c2", "c3", "c5"],
                      {1: {"accuracy": 0.0}, 3: {"accuracy": 0.0}},
                      True, 0.0) == results.results[2]
    def reinforce(self, train_dataset, encoder, output_dir):
        with tempfile.TemporaryDirectory() as tmpdir:
            interpreter = self.interpreter()

            collate = Collate(
                test_case_tensor=CollateOptions(False, 0, 0),
                variables_tensor=CollateOptions(True, 0, 0),
                previous_actions=CollateOptions(True, 0, -1),
                hidden_state=CollateOptions(False, 0, 0),
                state=CollateOptions(False, 0, 0),
                ground_truth_actions=CollateOptions(True, 0, -1),
                reward=CollateOptions(False, 0, 0)
            )
            collate_fn = Sequence(OrderedDict([
                ("to_episode", Map(self.to_episode(encoder,
                                                   interpreter))),
                ("flatten", Flatten()),
                ("transform", Map(self.transform(
                    encoder, interpreter, Parser()))),
                ("collate", collate.collate)
            ]))

            model = self.prepare_model(encoder)
            optimizer = self.prepare_optimizer(model)
            train_REINFORCE(
                output_dir, tmpdir, output_dir,
                train_dataset,
                self.prepare_synthesizer(model, encoder, interpreter),
                model, optimizer,
                torch.nn.Sequential(OrderedDict([
                    ("policy",
                     torch.nn.Sequential(OrderedDict([
                         ("loss",
                          Apply(
                              module=mlprogram.nn.action_sequence.Loss(
                                  reduction="none"
                              ),
                              in_keys=[
                                  "rule_probs",
                                  "token_probs",
                                  "reference_probs",
                                  "ground_truth_actions",
                              ],
                              out_key="action_sequence_loss",
                          )),
                         ("weight_by_reward",
                             Apply(
                                 [("reward", "lhs"),
                                  ("action_sequence_loss", "rhs")],
                                 "action_sequence_loss",
                                 mlprogram.nn.Function(Mul())))
                     ]))),
                    ("value",
                     torch.nn.Sequential(OrderedDict([
                         ("reshape_reward",
                             Apply(
                                 [("reward", "x")],
                                 "value_loss_target",
                                 Reshape([-1, 1]))),
                         ("BCE",
                             Apply(
                                 [("value", "input"),
                                  ("value_loss_target", "target")],
                                 "value_loss",
                                 torch.nn.BCELoss(reduction='sum'))),
                         ("reweight",
                             Apply(
                                 [("value_loss", "lhs")],
                                 "value_loss",
                                 mlprogram.nn.Function(Mul()),
                                 constants={"rhs": 1e-2})),
                     ]))),
                    ("aggregate",
                     Apply(
                         ["action_sequence_loss", "value_loss"],
                         "loss",
                         AggregatedLoss())),
                    ("normalize",
                     Apply(
                         [("loss", "lhs")],
                         "loss",
                         mlprogram.nn.Function(Div()),
                         constants={"rhs": 1})),
                    ("pick",
                     mlprogram.nn.Function(
                         Pick("loss")))
                ])),
                EvaluateSynthesizer(
                    train_dataset,
                    self.prepare_synthesizer(model, encoder, interpreter,
                                             rollout=False),
                    {}, top_n=[]),
                "generation_rate",
                metrics.use_environment(
                    metric=metrics.TestCaseResult(
                        interpreter=interpreter,
                        metric=metrics.use_environment(
                            metric=metrics.Iou(),
                            in_keys=["actual", "expected"],
                            value_key="actual",
                        )
                    ),
                    in_keys=["test_cases", "actual"],
                    value_key="actual",
                    transform=Threshold(threshold=0.9, dtype="float"),
                ),
                collate_fn,
                1, 1,
                Epoch(10), evaluation_interval=Epoch(10),
                snapshot_interval=Epoch(10),
                use_pretrained_model=True,
                use_pretrained_optimizer=True,
                threshold=1.0)
    def prepare_synthesizer(self, model, encoder, interpreter, rollout=True):
        collate = Collate(
            test_case_tensor=CollateOptions(False, 0, 0),
            input_feature=CollateOptions(False, 0, 0),
            test_case_feature=CollateOptions(False, 0, 0),
            reference_features=CollateOptions(True, 0, 0),
            variables_tensor=CollateOptions(True, 0, 0),
            previous_actions=CollateOptions(True, 0, -1),
            hidden_state=CollateOptions(False, 0, 0),
            state=CollateOptions(False, 0, 0),
            ground_truth_actions=CollateOptions(True, 0, -1)
        )
        subsampler = ActionSequenceSampler(
            encoder, IsSubtype(),
            Sequence(OrderedDict([
                ("tinput",
                 Apply(
                     module=TransformInputs(),
                     in_keys=["test_cases"],
                     out_key="test_case_tensor",
                 )),
                ("tvariable",
                 Apply(
                     module=TransformVariables(),
                     in_keys=["variables", "test_case_tensor"],
                     out_key="variables_tensor"
                 )),
            ])),
            Compose(OrderedDict([
                ("add_previous_actions",
                 Apply(
                    module=AddPreviousActions(encoder, n_dependent=1),
                    in_keys=["action_sequence", "reference"],
                    out_key="previous_actions",
                    constants={"train": False},
                    )),
                ("add_state", AddState("state")),
                ("add_hidden_state", AddState("hidden_state"))
            ])),
            collate, model,
            rng=np.random.RandomState(0))
        subsampler = mlprogram.samplers.transform(
            subsampler,
            Parser().unparse
        )
        subsynthesizer = SMC(
            5, 1,
            subsampler,
            max_try_num=1,
            to_key=Pick("action_sequence"),
            rng=np.random.RandomState(0)
        )

        sampler = SequentialProgramSampler(
            subsynthesizer,
            Apply(
                module=TransformInputs(),
                in_keys=["test_cases"],
                out_key="test_case_tensor",
            ),
            collate,
            model.encode_input,
            interpreter=interpreter,
            expander=Expander(),
            rng=np.random.RandomState(0))
        if rollout:
            sampler = FilteredSampler(
                sampler,
                metrics.use_environment(
                    metric=metrics.TestCaseResult(
                        interpreter,
                        metric=metrics.use_environment(
                            metric=metrics.Iou(),
                            in_keys=["actual", "expected"],
                            value_key="actual",
                        )
                    ),
                    in_keys=["test_cases", "actual"],
                    value_key="actual"
                ),
                1.0
            )
            return SMC(3, 1, sampler, rng=np.random.RandomState(0),
                       to_key=Pick("interpreter_state"), max_try_num=1)
        else:
            sampler = SamplerWithValueNetwork(
                sampler,
                Sequence(OrderedDict([
                    ("tinput",
                     Apply(
                         module=TransformInputs(),
                         in_keys=["test_cases"],
                         out_key="test_case_tensor",
                     )),
                    ("tvariable",
                     Apply(
                         module=TransformVariables(),
                         in_keys=["variables", "test_case_tensor"],
                         out_key="variables_tensor"
                     )),
                ])),
                collate,
                torch.nn.Sequential(OrderedDict([
                    ("encoder", model.encoder),
                    ("value", model.value),
                    ("pick",
                     mlprogram.nn.Function(
                         Pick("value")))
                ])))

            synthesizer = SynthesizerWithTimeout(
                SMC(3, 1, sampler, rng=np.random.RandomState(0),
                    to_key=Pick("interpreter_state"),
                    max_try_num=1),
                1
            )
            return FilteredSynthesizer(
                synthesizer,
                metrics.use_environment(
                    metric=metrics.TestCaseResult(
                        interpreter,
                        metric=metrics.use_environment(
                            metric=metrics.Iou(),
                            in_keys=["actual", "expected"],
                            value_key="actual",
                        )
                    ),
                    in_keys=["test_cases", "actual"],
                    value_key="actual"
                ),
                1.0
            )