Beispiel #1
0
    def test_eval(self):
        entries = [Environment(
            {"ground_truth": "y = x + 1"},
            set(["ground_truth"])
        )]
        dataset = ListDataset(entries)
        d = get_samples(dataset, MockParser())
        aencoder = ActionSequenceEncoder(d, 0)
        action_sequence = GroundTruthToActionSequence(MockParser())(
            "y = x + 1"
        )
        transform = AddPreviousActions(aencoder)
        prev_action_tensor = transform(
            reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")],
            action_sequence=action_sequence,
            train=False
        )

        assert np.array_equal(
            [
                [2, -1, -1], [3, -1, -1], [4, -1, -1], [-1, 1, -1],
                [1, -1, -1], [5, -1, -1], [-1, 2, -1], [1, -1, -1],
                [4, -1, -1], [-1, 3, -1], [1, -1, -1], [6, -1, -1],
                [-1, 4, -1], [1, -1, -1]
            ],
            prev_action_tensor.numpy()
        )
Beispiel #2
0
 def transform_cls(self, qencoder, cencoder, aencoder, parser):
     return Sequence(
         OrderedDict([("extract_reference",
                       Apply(module=mlprogram.nn.Function(tokenize),
                             in_keys=[["text_query", "str"]],
                             out_key="reference")),
                      ("encode_word_query",
                       Apply(module=EncodeWordQuery(qencoder),
                             in_keys=["reference"],
                             out_key="word_nl_query")),
                      ("encode_char",
                       Apply(module=EncodeCharacterQuery(cencoder, 10),
                             in_keys=["reference"],
                             out_key="char_nl_query")),
                      ("f2",
                       Apply(
                           module=GroundTruthToActionSequence(parser),
                           in_keys=["ground_truth"],
                           out_key="action_sequence",
                       )),
                      ("add_previous_action",
                       Apply(
                           module=AddPreviousActions(aencoder),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": True},
                           out_key="previous_actions",
                       )),
                      ("add_previous_action_rule",
                       Apply(
                           module=AddPreviousActionRules(aencoder, 4),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": True},
                           out_key="previous_action_rules",
                       )),
                      ("add_tree",
                       Apply(
                           module=AddActionSequenceAsTree(aencoder),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": True},
                           out_key=["adjacency_matrix", "depthes"],
                       )),
                      ("add_query",
                       Apply(
                           module=AddQueryForTreeGenDecoder(aencoder, 4),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": True},
                           out_key="action_queries",
                       )),
                      ("f4",
                       Apply(
                           module=EncodeActionSequence(aencoder),
                           in_keys=["action_sequence", "reference"],
                           out_key="ground_truth_actions",
                       ))]))
Beispiel #3
0
 def test_impossible_case(self):
     entries = [Environment(
         {"ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParser())
     d.tokens = [("", "y"), ("", "1")]
     aencoder = ActionSequenceEncoder(d, 0)
     transform = AddPreviousActions(aencoder)
     action_sequence = GroundTruthToActionSequence(MockParser())(
         "y = x + 1"
     )
     with pytest.raises(RuntimeError):
         transform(
             reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")],
             action_sequence=action_sequence,
             train=True
         )
 def prepare_synthesizer(self, model, qencoder, aencoder):
     transform_input = Compose(
         OrderedDict([("extract_reference",
                       Apply(module=mlprogram.nn.Function(tokenize),
                             in_keys=[["text_query", "str"]],
                             out_key="reference")),
                      ("encode_query",
                       Apply(module=EncodeWordQuery(qencoder),
                             in_keys=["reference"],
                             out_key="word_nl_query"))]))
     transform_action_sequence = Compose(
         OrderedDict([("add_previous_action",
                       Apply(
                           module=AddPreviousActions(aencoder,
                                                     n_dependent=1),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": False},
                           out_key="previous_actions",
                       )),
                      ("add_action",
                       Apply(
                           module=AddActions(aencoder, n_dependent=1),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": False},
                           out_key="actions",
                       )), ("add_state", AddState("state")),
                      ("add_hidden_state", AddState("hidden_state")),
                      ("add_history", AddState("history"))]))
     collate = Collate(word_nl_query=CollateOptions(True, 0, -1),
                       nl_query_features=CollateOptions(True, 0, -1),
                       reference_features=CollateOptions(True, 0, -1),
                       actions=CollateOptions(True, 0, -1),
                       previous_actions=CollateOptions(True, 0, -1),
                       previous_action_rules=CollateOptions(True, 0, -1),
                       history=CollateOptions(False, 1, 0),
                       hidden_state=CollateOptions(False, 0, 0),
                       state=CollateOptions(False, 0, 0),
                       ground_truth_actions=CollateOptions(True, 0, -1))
     return BeamSearch(
         5, 20,
         ActionSequenceSampler(aencoder, is_subtype, transform_input,
                               transform_action_sequence, collate, model))
 def transform_cls(self, qencoder, aencoder, parser):
     return Sequence(
         OrderedDict([("extract_reference",
                       Apply(module=mlprogram.nn.Function(tokenize),
                             in_keys=[["text_query", "str"]],
                             out_key="reference")),
                      ("encode_word_query",
                       Apply(module=EncodeWordQuery(qencoder),
                             in_keys=["reference"],
                             out_key="word_nl_query")),
                      ("f2",
                       Apply(
                           module=GroundTruthToActionSequence(parser),
                           in_keys=["ground_truth"],
                           out_key="action_sequence",
                       )),
                      ("add_previous_action",
                       Apply(
                           module=AddPreviousActions(aencoder,
                                                     n_dependent=1),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": True},
                           out_key="previous_actions",
                       )),
                      ("add_action",
                       Apply(
                           module=AddActions(aencoder, n_dependent=1),
                           in_keys=["action_sequence", "reference"],
                           constants={"train": True},
                           out_key="actions",
                       )), ("add_state", AddState("state")),
                      ("add_hidden_state", AddState("hidden_state")),
                      ("add_history", AddState("history")),
                      ("f4",
                       Apply(
                           module=EncodeActionSequence(aencoder),
                           in_keys=["action_sequence", "reference"],
                           out_key="ground_truth_actions",
                       ))]))
 def transform(self, encoder, interpreter, parser):
     tcode = Apply(
         module=GroundTruthToActionSequence(parser),
         in_keys=["ground_truth"],
         out_key="action_sequence"
     )
     aaction = Apply(
         module=AddPreviousActions(encoder, n_dependent=1),
         in_keys=["action_sequence", "reference"],
         constants={"train": True},
         out_key="previous_actions",
     )
     tgt = Apply(
         module=EncodeActionSequence(encoder),
         in_keys=["action_sequence", "reference"],
         out_key="ground_truth_actions",
     )
     return Sequence(
         OrderedDict([
             ("tinput",
              Apply(
                  module=TransformInputs(),
                  in_keys=["test_cases"],
                  out_key="test_case_tensor",
              )),
             ("tvariable",
              Apply(
                  module=TransformVariables(),
                  in_keys=["variables", "test_case_tensor"],
                  out_key="variables_tensor"
              )),
             ("tcode", tcode),
             ("aaction", aaction),
             ("add_state", AddState("state")),
             ("add_hidden_state", AddState("hidden_state")),
             ("tgt", tgt)
         ])
     )
Beispiel #7
0
    def prepare_synthesizer(self, model, qencoder, cencoder, aencoder):
        transform_input = Compose(
            OrderedDict([("extract_reference",
                          Apply(module=mlprogram.nn.Function(tokenize),
                                in_keys=[["text_query", "str"]],
                                out_key="reference")),
                         ("encode_word_query",
                          Apply(module=EncodeWordQuery(qencoder),
                                in_keys=["reference"],
                                out_key="word_nl_query")),
                         ("encode_char",
                          Apply(module=EncodeCharacterQuery(cencoder, 10),
                                in_keys=["reference"],
                                out_key="char_nl_query"))]))
        transform_action_sequence = Compose(
            OrderedDict([("add_previous_action",
                          Apply(
                              module=AddPreviousActions(aencoder),
                              in_keys=["action_sequence", "reference"],
                              constants={"train": False},
                              out_key="previous_actions",
                          )),
                         ("add_previous_action_rule",
                          Apply(
                              module=AddPreviousActionRules(
                                  aencoder,
                                  4,
                              ),
                              in_keys=["action_sequence", "reference"],
                              constants={"train": False},
                              out_key="previous_action_rules",
                          )),
                         ("add_tree",
                          Apply(
                              module=AddActionSequenceAsTree(aencoder),
                              in_keys=["action_sequence", "reference"],
                              constants={"train": False},
                              out_key=["adjacency_matrix", "depthes"],
                          )),
                         ("add_query",
                          Apply(
                              module=AddQueryForTreeGenDecoder(aencoder, 4),
                              in_keys=["action_sequence", "reference"],
                              constants={"train": False},
                              out_key="action_queries",
                          ))]))

        collate = Collate(word_nl_query=CollateOptions(True, 0, -1),
                          char_nl_query=CollateOptions(True, 0, -1),
                          nl_query_features=CollateOptions(True, 0, -1),
                          reference_features=CollateOptions(True, 0, -1),
                          previous_actions=CollateOptions(True, 0, -1),
                          previous_action_rules=CollateOptions(True, 0, -1),
                          depthes=CollateOptions(False, 1, 0),
                          adjacency_matrix=CollateOptions(False, 0, 0),
                          action_queries=CollateOptions(True, 0, -1),
                          ground_truth_actions=CollateOptions(True, 0, -1))
        return BeamSearch(
            5, 20,
            ActionSequenceSampler(aencoder, is_subtype, transform_input,
                                  transform_action_sequence, collate, model))
    def prepare_synthesizer(self, model, encoder, interpreter, rollout=True):
        collate = Collate(
            test_case_tensor=CollateOptions(False, 0, 0),
            input_feature=CollateOptions(False, 0, 0),
            test_case_feature=CollateOptions(False, 0, 0),
            reference_features=CollateOptions(True, 0, 0),
            variables_tensor=CollateOptions(True, 0, 0),
            previous_actions=CollateOptions(True, 0, -1),
            hidden_state=CollateOptions(False, 0, 0),
            state=CollateOptions(False, 0, 0),
            ground_truth_actions=CollateOptions(True, 0, -1)
        )
        subsampler = ActionSequenceSampler(
            encoder, IsSubtype(),
            Sequence(OrderedDict([
                ("tinput",
                 Apply(
                     module=TransformInputs(),
                     in_keys=["test_cases"],
                     out_key="test_case_tensor",
                 )),
                ("tvariable",
                 Apply(
                     module=TransformVariables(),
                     in_keys=["variables", "test_case_tensor"],
                     out_key="variables_tensor"
                 )),
            ])),
            Compose(OrderedDict([
                ("add_previous_actions",
                 Apply(
                    module=AddPreviousActions(encoder, n_dependent=1),
                    in_keys=["action_sequence", "reference"],
                    out_key="previous_actions",
                    constants={"train": False},
                    )),
                ("add_state", AddState("state")),
                ("add_hidden_state", AddState("hidden_state"))
            ])),
            collate, model,
            rng=np.random.RandomState(0))
        subsampler = mlprogram.samplers.transform(
            subsampler,
            Parser().unparse
        )
        subsynthesizer = SMC(
            5, 1,
            subsampler,
            max_try_num=1,
            to_key=Pick("action_sequence"),
            rng=np.random.RandomState(0)
        )

        sampler = SequentialProgramSampler(
            subsynthesizer,
            Apply(
                module=TransformInputs(),
                in_keys=["test_cases"],
                out_key="test_case_tensor",
            ),
            collate,
            model.encode_input,
            interpreter=interpreter,
            expander=Expander(),
            rng=np.random.RandomState(0))
        if rollout:
            sampler = FilteredSampler(
                sampler,
                metrics.use_environment(
                    metric=metrics.TestCaseResult(
                        interpreter,
                        metric=metrics.use_environment(
                            metric=metrics.Iou(),
                            in_keys=["actual", "expected"],
                            value_key="actual",
                        )
                    ),
                    in_keys=["test_cases", "actual"],
                    value_key="actual"
                ),
                1.0
            )
            return SMC(3, 1, sampler, rng=np.random.RandomState(0),
                       to_key=Pick("interpreter_state"), max_try_num=1)
        else:
            sampler = SamplerWithValueNetwork(
                sampler,
                Sequence(OrderedDict([
                    ("tinput",
                     Apply(
                         module=TransformInputs(),
                         in_keys=["test_cases"],
                         out_key="test_case_tensor",
                     )),
                    ("tvariable",
                     Apply(
                         module=TransformVariables(),
                         in_keys=["variables", "test_case_tensor"],
                         out_key="variables_tensor"
                     )),
                ])),
                collate,
                torch.nn.Sequential(OrderedDict([
                    ("encoder", model.encoder),
                    ("value", model.value),
                    ("pick",
                     mlprogram.nn.Function(
                         Pick("value")))
                ])))

            synthesizer = SynthesizerWithTimeout(
                SMC(3, 1, sampler, rng=np.random.RandomState(0),
                    to_key=Pick("interpreter_state"),
                    max_try_num=1),
                1
            )
            return FilteredSynthesizer(
                synthesizer,
                metrics.use_environment(
                    metric=metrics.TestCaseResult(
                        interpreter,
                        metric=metrics.use_environment(
                            metric=metrics.Iou(),
                            in_keys=["actual", "expected"],
                            value_key="actual",
                        )
                    ),
                    in_keys=["test_cases", "actual"],
                    value_key="actual"
                ),
                1.0
            )