예제 #1
0
 def from_json(cls, obj):
     source = AnnotatedSentence.from_json(obj["source"])
     target = AnnotatedSentence.from_json(obj["target"])
     truth = Truth(obj["truth"])
     gold_truth = obj["gold_truth"] is not None and Truth(obj["gold_truth"])
     # TODO(chaganty): how to serialize previous_state_action?
     previous_state_action = None
     return State(source, target, truth, gold_truth, previous_state_action)
예제 #2
0
    def test_from_tokens(self):
        text = "This is a test."
        tokens = "This is a test .".split()
        pos = "DT VBZ DT NN .".split()
        sentence = AnnotatedSentence.from_tokens(text, tokens)
        assert sentence.text == text
        assert len(sentence) == 5
        assert sentence[1].word == "is"

        sentence = AnnotatedSentence.from_tokens(text, tokens, pos)
        assert sentence.text == text
        assert len(sentence) == 5
        assert sentence[1].word == "is"
        assert sentence[1].pos == "VBZ"
예제 #3
0
 def test_parse_pb(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     assert sentence.text == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States."
     assert len(sentence) == 19
     assert sentence[1].word == "Hussein"
     assert sentence[1].ner == "PERSON"
예제 #4
0
 def test_parse_pb(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     assert sentence.text == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States."
     assert len(sentence) == 19
     assert sentence[1].word == "Hussein"
     assert sentence[1].ner == "PERSON"
예제 #5
0
 def test_from_tokens(self):
     text = "This is a test."
     tokens = "This is a test .".split()
     sentence = AnnotatedSentence.from_tokens(text, tokens)
     assert sentence.text == text
     assert len(sentence) == 5
     assert sentence[1].word == "is"
예제 #6
0
 def test_depparse(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     dp = sentence.depparse()
     assert dp.roots == [6] # politician
     assert (2, 'nsubj') in dp.children(6) # Obama is child of politician
     assert (3, 'cop') in dp.children(6) # 'is' is ia copula
     assert (0, 'compound') in dp.children(2) # 'Barack' is part of the compount that is Obama.
예제 #7
0
 def test_depparse(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     dp = sentence.depparse()
     assert dp.roots == [6] # politician
     assert (2, 'nsubj') in dp.children(6) # Obama is child of politician
     assert (3, 'cop') in dp.children(6) # 'is' is ia copula
     assert (0, 'compound') in dp.children(2) # 'Barack' is part of the compount that is Obama.
예제 #8
0
 def test_depparse_json(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     dp = sentence.depparse()
     edges = dp.to_json()
     # politician is root
     assert any((edge['dep'] == 'root' and edge['dependent'] == 7 and edge['dependentgloss'] == 'politician') for edge in edges)
     # Obama is child of politician
     assert any((edge['governer'] == 7 and edge['dep'] == 'nsubj' and edge['dependent'] == 3 and edge['dependentgloss'] == 'Obama') for edge in edges)
     # 'is' is ia copula
     assert any((edge['governer'] == 7 and edge['dep'] == 'cop' and edge['dependent'] == 4 and edge['dependentgloss'] == 'is') for edge in edges)
     # 'Barack' is part of the compount that is Obama.
     assert any((edge['governer'] == 3 and edge['dep'] == 'compound' and edge['dependent'] == 1 and edge['dependentgloss'] == 'Barack') for edge in edges)
예제 #9
0
 def test_depparse_json(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     dp = sentence.depparse()
     edges = dp.to_json()
     # politician is root
     assert any((edge['dep'] == 'root' and edge['dependent'] == 7 and edge['dependentgloss'] == 'politician') for edge in edges)
     # Obama is child of politician
     assert any((edge['governer'] == 7 and edge['dep'] == 'nsubj' and edge['dependent'] == 3 and edge['dependentgloss'] == 'Obama') for edge in edges)
     # 'is' is ia copula
     assert any((edge['governer'] == 7 and edge['dep'] == 'cop' and edge['dependent'] == 4 and edge['dependentgloss'] == 'is') for edge in edges)
     # 'Barack' is part of the compount that is Obama.
     assert any((edge['governer'] == 3 and edge['dep'] == 'compound' and edge['dependent'] == 1 and edge['dependentgloss'] == 'Barack') for edge in edges)
예제 #10
0
 def test_depparse_json(self, document_pb):
     sentence_pb = document_pb.sentence[0]
     sentence = AnnotatedSentence.from_pb(sentence_pb)
     dp = sentence.depparse()
     edges = dp.to_json()
     # politician is root
     assert any(
         (edge["dep"] == "root" and edge["dependent"] == 7 and edge["dependentgloss"] == "politician")
         for edge in edges
     )
     # Obama is child of politician
     assert any(
         (
             edge["governer"] == 7
             and edge["dep"] == "nsubj"
             and edge["dependent"] == 3
             and edge["dependentgloss"] == "Obama"
         )
         for edge in edges
     )
     # 'is' is ia copula
     assert any(
         (
             edge["governer"] == 7
             and edge["dep"] == "cop"
             and edge["dependent"] == 4
             and edge["dependentgloss"] == "is"
         )
         for edge in edges
     )
     # 'Barack' is part of the compount that is Obama.
     assert any(
         (
             edge["governer"] == 3
             and edge["dep"] == "compound"
             and edge["dependent"] == 1
             and edge["dependentgloss"] == "Barack"
         )
         for edge in edges
     )
예제 #11
0
def test_sentence_dict_to_pb(json_dict):
    orig_text = 'Really?'
    sent_dict = json_dict['sentences'][1]
    sent = AnnotatedSentence.dict_to_pb(sent_dict)
    assert sent.text == orig_text
    assert sent.token[1].word == u'?'
예제 #12
0
 def test_json_to_pb(self, json_dict):
     orig_text = 'Really?'
     sent_dict = json_dict['sentences'][1]
     sent = AnnotatedSentence.from_json(sent_dict)
     assert sent.text == orig_text
     assert sent[1].word == u'?'
예제 #13
0
def test_sentence_dict_to_pb(json_dict):
  orig_text = 'Really?'
  sent_dict = json_dict['sentences'][1]
  sent = AnnotatedSentence.dict_to_pb(sent_dict)
  assert sent.text == orig_text
  assert sent.token[1].word == u'?'
예제 #14
0
def make_sentence(text, typed_parse):
    pos, words = make_pos_tokens(typed_parse)
    return AnnotatedSentence.from_tokens(text, words, pos)