def test_eq(self, json_dict): # exact copy json_dict1 = copy.deepcopy(json_dict) # same as json_dict, but 'Belgian' is no longer capitalized json_dict2 = copy.deepcopy(json_dict) first_token_json = json_dict2["sentence"][0]["token"][0] first_token_json[u"originalText"] = "belgian" first_token_json[u"word"] = "belgian" doc = AnnotatedDocument.from_json(json_dict) doc1 = AnnotatedDocument.from_json(json_dict1) doc2 = AnnotatedDocument.from_json(json_dict2) assert doc == doc1 assert doc != doc2
def test_eq(self, json_dict): # exact copy json_dict1 = copy.deepcopy(json_dict) # same as json_dict, but 'Belgian' is no longer capitalized json_dict2 = copy.deepcopy(json_dict) first_token_json = json_dict2['sentence'][0]['token'][0] first_token_json[u'originalText'] = 'belgian' first_token_json[u'word'] = 'belgian' doc = AnnotatedDocument.from_json(json_dict) doc1 = AnnotatedDocument.from_json(json_dict1) doc2 = AnnotatedDocument.from_json(json_dict2) assert doc == doc1 assert doc != doc2
def test_eq(self, json_dict): # exact copy json_dict1 = copy.deepcopy(json_dict) # same as json_dict, but 'Belgian' is no longer capitalized json_dict2 = copy.deepcopy(json_dict) first_token_json = json_dict2['sentences'][0]['tokens'][0] first_token_json[u'originalText'] = 'belgian' first_token_json[u'word'] = 'belgian' doc = AnnotatedDocument.from_json(json_dict) doc1 = AnnotatedDocument.from_json(json_dict1) doc2 = AnnotatedDocument.from_json(json_dict2) assert doc == doc1 assert doc != doc2
def test_parse_pb(self, document_pb): document = AnnotatedDocument.from_pb(document_pb) assert ( document.text == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States. He is the first African American to hold the office and the first president born outside the continental United States. Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he was president of the Harvard Law Review." ) assert len(document) == 3 assert document[0][1].word == "Hussein" assert document[0][1].ner == "PERSON"
def setUpClass(cls, *args): with open(os.path.join(TEST_DATA, "doc.txt"), "r") as f: doc_txt = f.read() with open(os.path.join(TEST_DATA, "doc.pb"), "rb") as f: pb = proto.Document() pb.ParseFromString(f.read()) ann = AnnotatedDocument.from_pb(pb) cls._doc = Document(id="test", corpus_id="test", created=datetime.now(), date=datetime.now(), title="test", gloss=doc_txt, metadata="") cls._doc.save() cls._ann = ann cls._client = MockCoreNLPClient(cls._doc, cls._ann) super(CoreNLPTestCase, cls).setUpClass(*args)
def test_document_dict_to_pb(json_dict): orig_text = 'Belgian swimmers beat the United States. Really?' doc = AnnotatedDocument.dict_to_pb(json_dict) assert doc.text == orig_text assert doc.sentence[1].text == 'Really?'
def test_mentions(self, document_pb): document = AnnotatedDocument.from_pb(document_pb) mentions = document.mentions assert len(mentions) == 17
def test_parse_pb(self, document_pb): document = AnnotatedDocument.from_pb(document_pb) assert document.text == u"Barack Hussein Obama is an American politician who is the 44th and current President of the United States. He is the first African American to hold the office and the first president born outside the continental United States. Born in Honolulu, Hawaii, Obama is a graduate of Columbia University and Harvard Law School, where he was president of the Harvard Law Review." assert len(document) == 3 assert document[0][1].word == "Hussein" assert document[0][1].ner == "PERSON"
def doc(self, json_dict): return AnnotatedDocument.from_json(json_dict)
def test_json(self, json_dict): doc = AnnotatedDocument.from_json(json_dict) new_json = doc.to_json() assert json_dict == new_json
def test_json_to_pb(self, json_dict): orig_text = 'Belgian swimmers beat the United States. Really?' doc = AnnotatedDocument.from_json(json_dict) assert doc.text == orig_text assert doc[1].text == 'Really?'