def test_token_dict_to_pb(json_dict): token_dict = json_dict['sentences'][0]['tokens'][0] token = AnnotatedToken.dict_to_pb(token_dict) assert token.after == u' ' assert token.before == u'' assert token.beginChar == 0 assert token.endChar == 7 assert token.originalText == u'Belgian' assert token.word == u'Belgian'
def test_json_to_pb(self, json_dict): token_dict = json_dict['sentences'][0]['tokens'][0] token = AnnotatedToken.json_to_pb(token_dict) assert token.after == u' ' assert token.before == u'' assert token.beginChar == 0 assert token.endChar == 7 assert token.originalText == u'Belgian' assert token.word == u'Belgian'
def test_parse_pb(self, document_pb): token_pb = document_pb.sentence[1].token[3] token = AnnotatedToken.from_pb(token_pb) assert token.after == u' ' assert token.before == u' ' assert token.character_span == (117, 122) assert token.originalText == u'first' assert token.word == u'first' assert token.lemma == u'first' assert token.ner == u'ORDINAL' assert token.pos == u'JJ'