def test_encode_invalid_sequence(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, True)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("op", NodeType("value", NodeConstraint.Token, False)), ("arg0", NodeType("value", NodeConstraint.Token, True)), ("arg1", NodeType("value", NodeConstraint.Token, True))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, True), NodeType("expr", NodeConstraint.Node, True) ], [("", "f")]), 0) action_sequence = ActionSequence() action_sequence.eval(ApplyRule(funcdef)) action_sequence.eval(GenerateToken("", "f")) action_sequence.eval(GenerateToken("", "1")) action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) assert encoder.encode_action(action_sequence, [Token("", "2", "2")]) is None
def test_encode_parent(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, True)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("op", NodeType("value", NodeConstraint.Token, True)), ("arg0", NodeType("value", NodeConstraint.Token, True)), ("arg1", NodeType("value", NodeConstraint.Token, True))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, True), NodeType("expr", NodeConstraint.Node, False) ], [("", "f"), ("", "2")]), 0) action_sequence = ActionSequence() action_sequence.eval(ApplyRule(funcdef)) action_sequence.eval(GenerateToken("", "f")) action_sequence.eval(GenerateToken("", "1")) action_sequence.eval(GenerateToken("", "2")) action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) parent = encoder.encode_parent(action_sequence) assert np.array_equal([[-1, -1, -1, -1], [1, 2, 0, 0], [1, 2, 0, 0], [1, 2, 0, 0], [1, 2, 0, 0], [1, 2, 0, 1]], parent.numpy())
def test_encode_empty_sequence(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, False)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("op", NodeType("value", NodeConstraint.Token, False)), ("arg0", NodeType("value", NodeConstraint.Token, False)), ("arg1", NodeType("value", NodeConstraint.Token, False))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, False), NodeType("expr", NodeConstraint.Node, False) ], [("", "f")]), 0) action_sequence = ActionSequence() action = encoder.encode_action(action_sequence, [Token("", "1", "1")]) parent = encoder.encode_parent(action_sequence) d, m = encoder.encode_tree(action_sequence) assert np.array_equal([[-1, -1, -1, -1]], action.numpy()) assert np.array_equal([[-1, -1, -1, -1]], parent.numpy()) assert np.array_equal(np.zeros((0, )), d.numpy()) assert np.array_equal(np.zeros((0, 0)), m.numpy())
def test_encode_tree(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, True)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("op", NodeType("value", NodeConstraint.Token, True)), ("arg0", NodeType("value", NodeConstraint.Token, True)), ("arg1", NodeType("value", NodeConstraint.Token, True))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, True), NodeType("expr", NodeConstraint.Node, False) ], [("", "f"), ("", "2")]), 0) action_sequence = ActionSequence() action_sequence.eval(ApplyRule(funcdef)) action_sequence.eval(GenerateToken("", "f")) action_sequence.eval(GenerateToken("", "1")) d, m = encoder.encode_tree(action_sequence) assert np.array_equal([0, 1, 1], d.numpy()) assert np.array_equal([[0, 1, 1], [0, 0, 0], [0, 0, 0]], m.numpy())
def test_decode(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, True)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("op", NodeType("value", NodeConstraint.Token, True)), ("arg0", NodeType("value", NodeConstraint.Token, True)), ("arg1", NodeType("value", NodeConstraint.Token, True))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, True), NodeType("expr", NodeConstraint.Node, False) ], [("", "f")]), 0) action_sequence = ActionSequence() action_sequence.eval(ApplyRule(funcdef)) action_sequence.eval(GenerateToken("", "f")) action_sequence.eval(GenerateToken("", "1")) action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) expected_action_sequence = ActionSequence() expected_action_sequence.eval(ApplyRule(funcdef)) expected_action_sequence.eval(GenerateToken("", "f")) expected_action_sequence.eval(GenerateToken("", "1")) expected_action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) result = encoder.decode( encoder.encode_action(action_sequence, [Token(None, "1", "1")])[:-1, 1:], [Token(None, "1", "1")]) assert \ expected_action_sequence.action_sequence == result.action_sequence
def test_encode_path(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, True)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("constant", NodeType("value", NodeConstraint.Token, True))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, True), NodeType("expr", NodeConstraint.Node, True) ], [("", "f"), ("", "2")]), 0) action_sequence = ActionSequence() action_sequence.eval(ApplyRule(funcdef)) action_sequence.eval(GenerateToken("", "f")) action_sequence.eval(GenerateToken("", "1")) action_sequence.eval(GenerateToken("", "2")) action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) action_sequence.eval(ApplyRule(expr)) action_sequence.eval(GenerateToken("", "f")) action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) action_sequence.eval(ApplyRule(CloseVariadicFieldRule())) path = encoder.encode_path(action_sequence, 2) assert np.array_equal( np.array( [ [-1, -1], # funcdef [2, -1], # f [2, -1], # 1 [2, -1], # 2 [2, -1], # CloseVariadicField [2, -1], # expr [3, 2], # f [3, 2], # CloseVariadicField [2, -1], # CloseVariadicField ], dtype=np.long), path.numpy()) path = encoder.encode_path(action_sequence, 1) assert np.array_equal( np.array( [ [-1], # funcdef [2], # f [2], # 1 [2], # 2 [2], # CloseVariadicField [2], # expr [3], # f [3], # CloseVariadicField [2], # CloseVariadicField ], dtype=np.long), path.numpy())
def test_encode_completed_sequence(self): none = ExpandTreeRule(NodeType("value", NodeConstraint.Node, False), []) encoder = ActionSequenceEncoder( Samples([none], [NodeType("value", NodeConstraint.Node, False)], [("", "f")]), 0) action_sequence = ActionSequence() action_sequence.eval(ApplyRule(none)) action = encoder.encode_action(action_sequence, [Token("", "1", "1")]) parent = encoder.encode_parent(action_sequence) assert np.array_equal([[-1, 2, -1, -1], [-1, -1, -1, -1]], action.numpy()) assert np.array_equal([[-1, -1, -1, -1], [-1, -1, -1, -1]], parent.numpy())
def test_eval(self): entries = [Environment( {"text_query": "foo bar", "ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParser()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParser())( "y = x + 1" ) transform = AddActions(aencoder) action_tensor = transform( reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")], action_sequence=action_sequence, train=False ) assert np.array_equal( [ [2, 2, 0], [4, 3, 1], [6, 4, 2], [6, 4, 2], [5, 3, 1], [6, 5, 5], [6, 5, 5], [5, 5, 5], [6, 4, 8], [6, 4, 8], [5, 5, 5], [9, 6, 11], [9, 6, 11], [-1, -1, -1] ], action_tensor.numpy() )
def test_simple_case(self): entries = [Environment( {"ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParser()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParser())( ground_truth="y = x + 1" ) transform = EncodeActionSequence(aencoder) ground_truth = transform( action_sequence=action_sequence, reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")], ) assert np.array_equal( [ [3, -1, -1], [4, -1, -1], [-1, 1, -1], [1, -1, -1], [5, -1, -1], [-1, 2, -1], [1, -1, -1], [4, -1, -1], [-1, 3, -1], [1, -1, -1], [6, -1, -1], [-1, 4, -1], [1, -1, -1] ], ground_truth.numpy() )
def test_eval(self): entries = [Environment( {"text_query": "ab test", "ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParserWithoutVariadicArgs()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())( "y = x + 1" ) transform = AddQueryForTreeGenDecoder(aencoder, 3,) query = transform( reference=[Token(None, "ab", "ab"), Token(None, "test", "test")], action_sequence=action_sequence, train=False ) assert np.array_equal( [ [-1, -1, -1], [2, -1, -1], [3, 2, -1], [4, 3, 2], [3, 2, -1], [5, 3, 2], [5, 3, 2], [4, 5, 3], [5, 3, 2], [6, 5, 3] ], query.numpy() )
def test_eval(self): entries = [Environment( {"text_query": "ab test", "ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParserWithoutVariadicArgs()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())( "y = x + 1" ) transform = AddActionSequenceAsTree(aencoder,) matrix, depth = transform( reference=[Token(None, "ab", "ab"), Token(None, "test", "test")], action_sequence=action_sequence, train=False ) assert np.array_equal( [0, 1, 2, 3, 2, 3, 3, 4, 3, 4], depth.numpy() ) assert np.array_equal( [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], matrix.numpy() )
def test_n_dependent(self): entries = [Environment( {"text_query": "ab test", "ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParserWithoutVariadicArgs()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())( "y = x + 1" ) transform = AddPreviousActionRules(aencoder, 2, n_dependent=3) prev_rule_action = transform( reference=[Token(None, "ab", "ab"), Token(None, "test", "test")], action_sequence=action_sequence, train=False, ) assert np.array_equal( [ # str -> "y" [[-1, -1, -1], [-1, 3, -1], [-1, -1, -1]], # Number -> number [[8, -1, -1], [9, -1, -1], [-1, -1, -1]], [[-1, -1, -1], [-1, 4, -1], [-1, -1, -1]], ], prev_rule_action.numpy() )
def test_eval(self): entries = [Environment( {"ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParser()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParser())( "y = x + 1" ) transform = AddPreviousActions(aencoder) prev_action_tensor = transform( reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")], action_sequence=action_sequence, train=False ) assert np.array_equal( [ [2, -1, -1], [3, -1, -1], [4, -1, -1], [-1, 1, -1], [1, -1, -1], [5, -1, -1], [-1, 2, -1], [1, -1, -1], [4, -1, -1], [-1, 3, -1], [1, -1, -1], [6, -1, -1], [-1, 4, -1], [1, -1, -1] ], prev_action_tensor.numpy() )
def prepare_encoder(self, dataset, parser): words = get_words(dataset, tokenize) chars = get_characters(dataset, tokenize) samples = get_samples(dataset, parser) qencoder = LabelEncoder(words, 2) cencoder = LabelEncoder(chars, 0) aencoder = ActionSequenceEncoder(samples, 2) return qencoder, cencoder, aencoder
def test_decode_invalid_tensor(self): funcdef = ExpandTreeRule( NodeType("def", NodeConstraint.Node, False), [("name", NodeType("value", NodeConstraint.Token, False)), ("body", NodeType("expr", NodeConstraint.Node, True))]) expr = ExpandTreeRule( NodeType("expr", NodeConstraint.Node, False), [("op", NodeType("value", NodeConstraint.Token, False)), ("arg0", NodeType("value", NodeConstraint.Token, False)), ("arg1", NodeType("value", NodeConstraint.Token, False))]) encoder = ActionSequenceEncoder( Samples([funcdef, expr], [ NodeType("def", NodeConstraint.Node, False), NodeType("value", NodeConstraint.Token, False), NodeType("expr", NodeConstraint.Node, False) ], [("", "f")]), 0) assert encoder.decode(torch.LongTensor([[-1, -1, -1]]), []) is None assert encoder.decode(torch.LongTensor([[-1, -1, 1]]), []) is None
def test_impossible_case(self): entries = [Environment( {"ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParser()) d.tokens = [("", "y"), ("", "1")] aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParser())( ground_truth="y = x + 1" ) transform = EncodeActionSequence(aencoder) with pytest.raises(RuntimeError): transform( reference=[Token(None, "foo", "foo"), Token(None, "bar", "bar")], action_sequence=action_sequence, )
def test_eval(self): entries = [Environment( {"text_query": "ab test", "ground_truth": "y = x + 1"}, set(["ground_truth"]) )] dataset = ListDataset(entries) d = get_samples(dataset, MockParserWithoutVariadicArgs()) aencoder = ActionSequenceEncoder(d, 0) action_sequence = GroundTruthToActionSequence(MockParserWithoutVariadicArgs())( "y = x + 1" ) transform = AddPreviousActionRules(aencoder, 2) prev_rule_action = transform( reference=[Token(None, "ab", "ab"), Token(None, "test", "test")], action_sequence=action_sequence, train=False ) assert np.array_equal( [ # None -> Root [[1, -1, -1], [2, -1, -1], [-1, -1, -1]], # Assign -> Name, expr [[3, -1, -1], [4, -1, -1], [5, -1, -1]], # Name -> str [[4, -1, -1], [6, -1, -1], [-1, -1, -1]], # str -> "x" [[-1, -1, -1], [-1, 1, -1], [-1, -1, -1]], # Op -> str, expr, expr [[7, -1, -1], [6, -1, -1], [5, -1, -1]], # str -> "+" [[-1, -1, -1], [-1, 2, -1], [-1, -1, -1]], # Name -> str [[4, -1, -1], [6, -1, -1], [-1, -1, -1]], # str -> "y" [[-1, -1, -1], [-1, 3, -1], [-1, -1, -1]], # Number -> number [[8, -1, -1], [9, -1, -1], [-1, -1, -1]], [[-1, -1, -1], [-1, 4, -1], [-1, -1, -1]], ], prev_rule_action.numpy() )
def prepare_encoder(self, dataset, parser): return ActionSequenceEncoder(get_samples(dataset, parser, reference=True), 0)
def create_encoder(): return ActionSequenceEncoder(Samples( [Root2X, Root2Y, X2Y_list, Ysub2Str], [R, X, Y, Ysub, Y_list, Str], [("Str", "x"), ("Int", "1")]), 0)
def test_reserved_labels(self): encoder = ActionSequenceEncoder(Samples([], [], []), 0) assert 2 == len(encoder._rule_encoder.vocab) assert 1 == len(encoder._token_encoder.vocab)
def test_encode_raw_value(self): encoder = ActionSequenceEncoder( Samples([], [], [("", "foo"), ("x", "foo")]), 0) assert [1, 2] == encoder.encode_raw_value("foo") assert [0] == encoder.encode_raw_value("bar")