Beispiel #1
0
    def test_eq(self, json_dict):
        # exact copy
        json_dict1 = copy.deepcopy(json_dict)

        # same as json_dict, but 'Belgian' is no longer capitalized
        json_dict2 = copy.deepcopy(json_dict)
        first_token_json = json_dict2["sentence"][0]["token"][0]
        first_token_json[u"originalText"] = "belgian"
        first_token_json[u"word"] = "belgian"

        doc = AnnotatedDocument.from_json(json_dict)
        doc1 = AnnotatedDocument.from_json(json_dict1)
        doc2 = AnnotatedDocument.from_json(json_dict2)

        assert doc == doc1
        assert doc != doc2
    def test_eq(self, json_dict):
        # exact copy
        json_dict1 = copy.deepcopy(json_dict)

        # same as json_dict, but 'Belgian' is no longer capitalized
        json_dict2 = copy.deepcopy(json_dict)
        first_token_json = json_dict2['sentence'][0]['token'][0]
        first_token_json[u'originalText'] = 'belgian'
        first_token_json[u'word'] = 'belgian'

        doc = AnnotatedDocument.from_json(json_dict)
        doc1 = AnnotatedDocument.from_json(json_dict1)
        doc2 = AnnotatedDocument.from_json(json_dict2)

        assert doc == doc1
        assert doc != doc2
Beispiel #3
0
    def test_eq(self, json_dict):
        # exact copy
        json_dict1 = copy.deepcopy(json_dict)

        # same as json_dict, but 'Belgian' is no longer capitalized
        json_dict2 = copy.deepcopy(json_dict)
        first_token_json = json_dict2['sentences'][0]['tokens'][0]
        first_token_json[u'originalText'] = 'belgian'
        first_token_json[u'word'] = 'belgian'

        doc = AnnotatedDocument.from_json(json_dict)
        doc1 = AnnotatedDocument.from_json(json_dict1)
        doc2 = AnnotatedDocument.from_json(json_dict2)

        assert doc == doc1
        assert doc != doc2
Beispiel #4
0
 def test_parse_pb(self, document_pb):
     document = AnnotatedDocument.from_pb(document_pb)
     assert (
         document.text
         == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States. He is the first African American to hold the office and the first president born outside the continental United States. Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he was president of the Harvard Law Review."
     )
     assert len(document) == 3
     assert document[0][1].word == "Hussein"
     assert document[0][1].ner == "PERSON"
Beispiel #5
0
    def setUpClass(cls, *args):
        with open(os.path.join(TEST_DATA, "doc.txt"), "r") as f:
            doc_txt = f.read()
        with open(os.path.join(TEST_DATA, "doc.pb"), "rb") as f:
            pb = proto.Document()
            pb.ParseFromString(f.read())
            ann = AnnotatedDocument.from_pb(pb)

        cls._doc = Document(id="test",
                            corpus_id="test",
                            created=datetime.now(),
                            date=datetime.now(),
                            title="test",
                            gloss=doc_txt,
                            metadata="")
        cls._doc.save()
        cls._ann = ann
        cls._client = MockCoreNLPClient(cls._doc, cls._ann)
        super(CoreNLPTestCase, cls).setUpClass(*args)
Beispiel #6
0
def test_document_dict_to_pb(json_dict):
    orig_text = 'Belgian swimmers beat the United States. Really?'
    doc = AnnotatedDocument.dict_to_pb(json_dict)
    assert doc.text == orig_text
    assert doc.sentence[1].text == 'Really?'
 def test_mentions(self, document_pb):
     document = AnnotatedDocument.from_pb(document_pb)
     mentions = document.mentions
     assert len(mentions) == 17
 def test_parse_pb(self, document_pb):
     document = AnnotatedDocument.from_pb(document_pb)
     assert document.text == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States. He is the first African American to hold the office and the first president born outside the continental United States. Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he was president of the Harvard Law Review."
     assert len(document) == 3
     assert document[0][1].word == "Hussein"
     assert document[0][1].ner == "PERSON"
 def doc(self, json_dict):
     return AnnotatedDocument.from_json(json_dict)
 def test_json(self, json_dict):
     doc = AnnotatedDocument.from_json(json_dict)
     new_json = doc.to_json()
     assert json_dict == new_json
Beispiel #11
0
 def test_mentions(self, document_pb):
     document = AnnotatedDocument.from_pb(document_pb)
     mentions = document.mentions
     assert len(mentions) == 17
Beispiel #12
0
 def doc(self, json_dict):
     return AnnotatedDocument.from_json(json_dict)
Beispiel #13
0
 def test_json(self, json_dict):
     doc = AnnotatedDocument.from_json(json_dict)
     new_json = doc.to_json()
     assert json_dict == new_json
Beispiel #14
0
 def test_json_to_pb(self, json_dict):
     orig_text = 'Belgian swimmers beat the United States. Really?'
     doc = AnnotatedDocument.from_json(json_dict)
     assert doc.text == orig_text
     assert doc[1].text == 'Really?'
Beispiel #15
0
def test_document_dict_to_pb(json_dict):
  orig_text = 'Belgian swimmers beat the United States. Really?'
  doc = AnnotatedDocument.dict_to_pb(json_dict)
  assert doc.text == orig_text
  assert doc.sentence[1].text == 'Really?'