Exemple #1
0
    def test_encode_invalid_sequence(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, True)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("op", NodeType("value", NodeConstraint.Token, False)),
             ("arg0", NodeType("value", NodeConstraint.Token, True)),
             ("arg1", NodeType("value", NodeConstraint.Token, True))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, True),
                NodeType("expr", NodeConstraint.Node, True)
            ], [("", "f")]), 0)
        action_sequence = ActionSequence()
        action_sequence.eval(ApplyRule(funcdef))
        action_sequence.eval(GenerateToken("", "f"))
        action_sequence.eval(GenerateToken("", "1"))
        action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))

        assert encoder.encode_action(action_sequence,
                                     [Token("", "2", "2")]) is None
Exemple #2
0
    def test_encode_parent(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, True)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("op", NodeType("value", NodeConstraint.Token, True)),
             ("arg0", NodeType("value", NodeConstraint.Token, True)),
             ("arg1", NodeType("value", NodeConstraint.Token, True))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, True),
                NodeType("expr", NodeConstraint.Node, False)
            ], [("", "f"), ("", "2")]), 0)
        action_sequence = ActionSequence()
        action_sequence.eval(ApplyRule(funcdef))
        action_sequence.eval(GenerateToken("", "f"))
        action_sequence.eval(GenerateToken("", "1"))
        action_sequence.eval(GenerateToken("", "2"))
        action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))
        parent = encoder.encode_parent(action_sequence)

        assert np.array_equal([[-1, -1, -1, -1], [1, 2, 0, 0], [1, 2, 0, 0],
                               [1, 2, 0, 0], [1, 2, 0, 0], [1, 2, 0, 1]],
                              parent.numpy())
Exemple #3
0
    def test_encode_empty_sequence(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, False)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("op", NodeType("value", NodeConstraint.Token, False)),
             ("arg0", NodeType("value", NodeConstraint.Token, False)),
             ("arg1", NodeType("value", NodeConstraint.Token, False))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, False),
                NodeType("expr", NodeConstraint.Node, False)
            ], [("", "f")]), 0)
        action_sequence = ActionSequence()
        action = encoder.encode_action(action_sequence, [Token("", "1", "1")])
        parent = encoder.encode_parent(action_sequence)
        d, m = encoder.encode_tree(action_sequence)

        assert np.array_equal([[-1, -1, -1, -1]], action.numpy())
        assert np.array_equal([[-1, -1, -1, -1]], parent.numpy())
        assert np.array_equal(np.zeros((0, )), d.numpy())
        assert np.array_equal(np.zeros((0, 0)), m.numpy())
Exemple #4
0
    def test_encode_tree(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, True)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("op", NodeType("value", NodeConstraint.Token, True)),
             ("arg0", NodeType("value", NodeConstraint.Token, True)),
             ("arg1", NodeType("value", NodeConstraint.Token, True))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, True),
                NodeType("expr", NodeConstraint.Node, False)
            ], [("", "f"), ("", "2")]), 0)
        action_sequence = ActionSequence()
        action_sequence.eval(ApplyRule(funcdef))
        action_sequence.eval(GenerateToken("", "f"))
        action_sequence.eval(GenerateToken("", "1"))
        d, m = encoder.encode_tree(action_sequence)

        assert np.array_equal([0, 1, 1], d.numpy())
        assert np.array_equal([[0, 1, 1], [0, 0, 0], [0, 0, 0]], m.numpy())
Exemple #5
0
    def test_decode(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, True)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("op", NodeType("value", NodeConstraint.Token, True)),
             ("arg0", NodeType("value", NodeConstraint.Token, True)),
             ("arg1", NodeType("value", NodeConstraint.Token, True))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, True),
                NodeType("expr", NodeConstraint.Node, False)
            ], [("", "f")]), 0)
        action_sequence = ActionSequence()
        action_sequence.eval(ApplyRule(funcdef))
        action_sequence.eval(GenerateToken("", "f"))
        action_sequence.eval(GenerateToken("", "1"))
        action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))

        expected_action_sequence = ActionSequence()
        expected_action_sequence.eval(ApplyRule(funcdef))
        expected_action_sequence.eval(GenerateToken("", "f"))
        expected_action_sequence.eval(GenerateToken("", "1"))
        expected_action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))

        result = encoder.decode(
            encoder.encode_action(action_sequence,
                                  [Token(None, "1", "1")])[:-1, 1:],
            [Token(None, "1", "1")])
        assert \
            expected_action_sequence.action_sequence == result.action_sequence
Exemple #6
0
    def test_encode_path(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, True)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("constant", NodeType("value", NodeConstraint.Token, True))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, True),
                NodeType("expr", NodeConstraint.Node, True)
            ], [("", "f"), ("", "2")]), 0)
        action_sequence = ActionSequence()
        action_sequence.eval(ApplyRule(funcdef))
        action_sequence.eval(GenerateToken("", "f"))
        action_sequence.eval(GenerateToken("", "1"))
        action_sequence.eval(GenerateToken("", "2"))
        action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))
        action_sequence.eval(ApplyRule(expr))
        action_sequence.eval(GenerateToken("", "f"))
        action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))
        action_sequence.eval(ApplyRule(CloseVariadicFieldRule()))
        path = encoder.encode_path(action_sequence, 2)

        assert np.array_equal(
            np.array(
                [
                    [-1, -1],  # funcdef
                    [2, -1],  # f
                    [2, -1],  # 1
                    [2, -1],  # 2
                    [2, -1],  # CloseVariadicField
                    [2, -1],  # expr
                    [3, 2],  # f
                    [3, 2],  # CloseVariadicField
                    [2, -1],  # CloseVariadicField
                ],
                dtype=np.long),
            path.numpy())
        path = encoder.encode_path(action_sequence, 1)
        assert np.array_equal(
            np.array(
                [
                    [-1],  # funcdef
                    [2],  # f
                    [2],  # 1
                    [2],  # 2
                    [2],  # CloseVariadicField
                    [2],  # expr
                    [3],  # f
                    [3],  # CloseVariadicField
                    [2],  # CloseVariadicField
                ],
                dtype=np.long),
            path.numpy())
Exemple #7
0
    def test_encode_completed_sequence(self):
        none = ExpandTreeRule(NodeType("value", NodeConstraint.Node, False),
                              [])
        encoder = ActionSequenceEncoder(
            Samples([none], [NodeType("value", NodeConstraint.Node, False)],
                    [("", "f")]), 0)
        action_sequence = ActionSequence()
        action_sequence.eval(ApplyRule(none))
        action = encoder.encode_action(action_sequence, [Token("", "1", "1")])
        parent = encoder.encode_parent(action_sequence)

        assert np.array_equal([[-1, 2, -1, -1], [-1, -1, -1, -1]],
                              action.numpy())
        assert np.array_equal([[-1, -1, -1, -1], [-1, -1, -1, -1]],
                              parent.numpy())
Exemple #8
0
    def test_eval(self):
        entries = [Environment(
            {"text_query": "foo bar", "ground_truth": "y = x + 1"},
            set(["ground_truth"])
        )]
        dataset = ListDataset(entries)
        d = get_samples(dataset, MockParser())
        aencoder = ActionSequenceEncoder(d, 0)
        action_sequence = GroundTruthToActionSequence(MockParser())(
            "y = x + 1"
        )
        transform = AddActions(aencoder)
        action_tensor = transform(
            reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")],
            action_sequence=action_sequence,
            train=False
        )

        assert np.array_equal(
            [
                [2, 2, 0], [4, 3, 1], [6, 4, 2], [6, 4, 2], [5, 3, 1],
                [6, 5, 5], [6, 5, 5], [5, 5, 5], [6, 4, 8], [6, 4, 8],
                [5, 5, 5], [9, 6, 11], [9, 6, 11], [-1, -1, -1]
            ],
            action_tensor.numpy()
        )
Exemple #9
0
 def test_simple_case(self):
     entries = [Environment(
         {"ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParser())
     aencoder = ActionSequenceEncoder(d, 0)
     action_sequence = GroundTruthToActionSequence(MockParser())(
         ground_truth="y = x + 1"
     )
     transform = EncodeActionSequence(aencoder)
     ground_truth = transform(
         action_sequence=action_sequence,
         reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")],
     )
     assert np.array_equal(
         [
             [3, -1, -1], [4, -1, -1], [-1, 1, -1], [1, -1, -1],
             [5, -1, -1], [-1, 2, -1], [1, -1, -1], [4, -1, -1],
             [-1, 3, -1], [1, -1, -1], [6, -1, -1], [-1, 4, -1],
             [1, -1, -1]
         ],
         ground_truth.numpy()
     )
Exemple #10
0
 def test_eval(self):
     entries = [Environment(
         {"text_query": "ab test", "ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParserWithoutVariadicArgs())
     aencoder = ActionSequenceEncoder(d, 0)
     action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())(
         "y = x + 1"
     )
     transform = AddQueryForTreeGenDecoder(aencoder, 3,)
     query = transform(
         reference=[Token(None, "ab", "ab"), Token(None, "test", "test")],
         action_sequence=action_sequence,
         train=False
     )
     assert np.array_equal(
         [
             [-1, -1, -1], [2, -1, -1], [3, 2, -1], [4, 3, 2],
             [3, 2, -1], [5, 3, 2], [5, 3, 2], [4, 5, 3],
             [5, 3, 2], [6, 5, 3]
         ],
         query.numpy()
     )
Exemple #11
0
 def test_eval(self):
     entries = [Environment(
         {"text_query": "ab test", "ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParserWithoutVariadicArgs())
     aencoder = ActionSequenceEncoder(d, 0)
     action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())(
         "y = x + 1"
     )
     transform = AddActionSequenceAsTree(aencoder,)
     matrix, depth = transform(
         reference=[Token(None, "ab", "ab"), Token(None, "test", "test")],
         action_sequence=action_sequence,
         train=False
     )
     assert np.array_equal(
         [0, 1, 2, 3, 2, 3, 3, 4, 3, 4],
         depth.numpy()
     )
     assert np.array_equal(
         [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
          [0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 1, 1, 0, 1, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
          [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
         matrix.numpy()
     )
Exemple #12
0
 def test_n_dependent(self):
     entries = [Environment(
         {"text_query": "ab test", "ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParserWithoutVariadicArgs())
     aencoder = ActionSequenceEncoder(d, 0)
     action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())(
         "y = x + 1"
     )
     transform = AddPreviousActionRules(aencoder, 2, n_dependent=3)
     prev_rule_action = transform(
         reference=[Token(None, "ab", "ab"), Token(None, "test", "test")],
         action_sequence=action_sequence,
         train=False,
     )
     assert np.array_equal(
         [
             # str -> "y"
             [[-1, -1, -1], [-1, 3, -1], [-1, -1, -1]],
             # Number -> number
             [[8, -1, -1], [9, -1, -1], [-1, -1, -1]],
             [[-1, -1, -1], [-1, 4, -1], [-1, -1, -1]],
         ],
         prev_rule_action.numpy()
     )
Exemple #13
0
    def test_eval(self):
        entries = [Environment(
            {"ground_truth": "y = x + 1"},
            set(["ground_truth"])
        )]
        dataset = ListDataset(entries)
        d = get_samples(dataset, MockParser())
        aencoder = ActionSequenceEncoder(d, 0)
        action_sequence = GroundTruthToActionSequence(MockParser())(
            "y = x + 1"
        )
        transform = AddPreviousActions(aencoder)
        prev_action_tensor = transform(
            reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")],
            action_sequence=action_sequence,
            train=False
        )

        assert np.array_equal(
            [
                [2, -1, -1], [3, -1, -1], [4, -1, -1], [-1, 1, -1],
                [1, -1, -1], [5, -1, -1], [-1, 2, -1], [1, -1, -1],
                [4, -1, -1], [-1, 3, -1], [1, -1, -1], [6, -1, -1],
                [-1, 4, -1], [1, -1, -1]
            ],
            prev_action_tensor.numpy()
        )
Exemple #14
0
    def prepare_encoder(self, dataset, parser):
        words = get_words(dataset, tokenize)
        chars = get_characters(dataset, tokenize)
        samples = get_samples(dataset, parser)

        qencoder = LabelEncoder(words, 2)
        cencoder = LabelEncoder(chars, 0)
        aencoder = ActionSequenceEncoder(samples, 2)
        return qencoder, cencoder, aencoder
Exemple #15
0
    def test_decode_invalid_tensor(self):
        funcdef = ExpandTreeRule(
            NodeType("def", NodeConstraint.Node, False),
            [("name", NodeType("value", NodeConstraint.Token, False)),
             ("body", NodeType("expr", NodeConstraint.Node, True))])
        expr = ExpandTreeRule(
            NodeType("expr", NodeConstraint.Node, False),
            [("op", NodeType("value", NodeConstraint.Token, False)),
             ("arg0", NodeType("value", NodeConstraint.Token, False)),
             ("arg1", NodeType("value", NodeConstraint.Token, False))])

        encoder = ActionSequenceEncoder(
            Samples([funcdef, expr], [
                NodeType("def", NodeConstraint.Node, False),
                NodeType("value", NodeConstraint.Token, False),
                NodeType("expr", NodeConstraint.Node, False)
            ], [("", "f")]), 0)
        assert encoder.decode(torch.LongTensor([[-1, -1, -1]]), []) is None
        assert encoder.decode(torch.LongTensor([[-1, -1, 1]]), []) is None
Exemple #16
0
 def test_impossible_case(self):
     entries = [Environment(
         {"ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParser())
     d.tokens = [("", "y"), ("", "1")]
     aencoder = ActionSequenceEncoder(d, 0)
     action_sequence = GroundTruthToActionSequence(MockParser())(
         ground_truth="y = x + 1"
     )
     transform = EncodeActionSequence(aencoder)
     with pytest.raises(RuntimeError):
         transform(
             reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")],
             action_sequence=action_sequence,
         )
Exemple #17
0
 def test_eval(self):
     entries = [Environment(
         {"text_query": "ab test", "ground_truth": "y = x + 1"},
         set(["ground_truth"])
     )]
     dataset = ListDataset(entries)
     d = get_samples(dataset, MockParserWithoutVariadicArgs())
     aencoder = ActionSequenceEncoder(d, 0)
     action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())(
         "y = x + 1"
     )
     transform = AddPreviousActionRules(aencoder, 2)
     prev_rule_action = transform(
         reference=[Token(None, "ab", "ab"), Token(None, "test", "test")],
         action_sequence=action_sequence,
         train=False
     )
     assert np.array_equal(
         [
             # None -> Root
             [[1, -1, -1], [2, -1, -1], [-1, -1, -1]],
             # Assign -> Name, expr
             [[3, -1, -1], [4, -1, -1], [5, -1, -1]],
             # Name -> str
             [[4, -1, -1], [6, -1, -1], [-1, -1, -1]],
             # str -> "x"
             [[-1, -1, -1], [-1, 1, -1], [-1, -1, -1]],
             # Op -> str, expr, expr
             [[7, -1, -1], [6, -1, -1], [5, -1, -1]],
             # str -> "+"
             [[-1, -1, -1], [-1, 2, -1], [-1, -1, -1]],
             # Name -> str
             [[4, -1, -1], [6, -1, -1], [-1, -1, -1]],
             # str -> "y"
             [[-1, -1, -1], [-1, 3, -1], [-1, -1, -1]],
             # Number -> number
             [[8, -1, -1], [9, -1, -1], [-1, -1, -1]],
             [[-1, -1, -1], [-1, 4, -1], [-1, -1, -1]],
         ],
         prev_rule_action.numpy()
     )
 def prepare_encoder(self, dataset, parser):
     return ActionSequenceEncoder(get_samples(dataset, parser, reference=True),
                                  0)
def create_encoder():
    return ActionSequenceEncoder(Samples(
        [Root2X, Root2Y, X2Y_list, Ysub2Str],
        [R, X, Y, Ysub, Y_list, Str],
        [("Str", "x"),
         ("Int", "1")]), 0)
Exemple #20
0
 def test_reserved_labels(self):
     encoder = ActionSequenceEncoder(Samples([], [], []), 0)
     assert 2 == len(encoder._rule_encoder.vocab)
     assert 1 == len(encoder._token_encoder.vocab)
Exemple #21
0
 def test_encode_raw_value(self):
     encoder = ActionSequenceEncoder(
         Samples([], [], [("", "foo"), ("x", "foo")]), 0)
     assert [1, 2] == encoder.encode_raw_value("foo")
     assert [0] == encoder.encode_raw_value("bar")