def test_ner(self): predictor = pretrained.named_entity_recognition_with_elmo_peters_2018() sentence = """Michael Jordan is a professor at Berkeley.""" result = predictor.predict_json({"sentence": sentence}) assert result["words"] == ["Michael", "Jordan", "is", "a", "professor", "at", "Berkeley", "."] assert result["tags"] == ["B-PER", "L-PER", "O", "O", "O", "O", "U-LOC", "O"]
def __init__(self, tagger_model: str) -> None: super(DropNERTokenIndexer, self).__init__() if tagger_model == 'elmo_peters_2018': self.ner_tagger = named_entity_recognition_with_elmo_peters_2018() elif tagger_model == 'fine_grained_elmo_peters_2018': self.ner_tagger = fine_grained_named_entity_recognition_with_elmo_peters_2018( ) else: raise ValueError( 'Unsupported NER Tagger module. ' 'Please use \"elmo_peters_2018\" or \"fine_grained_elmo_peters_2018\"' )
def main(args): fname = args.metadata with open(fname, 'r') as f: metadatas = json.load(f) print('Building model...') predictor = pretrained.named_entity_recognition_with_elmo_peters_2018() output = list() data_path = args.data for i, metadata in enumerate(metadatas): pdf_fname = os.path.split(metadata['pdf'])[1] paper_id = pdf_fname.split('-')[0] if len(KEEP) > 0 and paper_id not in KEEP: continue print(paper_id) if paper_id in IGNORE: print('Ignored') continue txt_fname = pdf_fname.replace('.pdf', '.txt') txt_path = os.path.join(data_path, txt_fname) affiliations = extract_affiliations(txt_path, metadata, predictor) has_code = metadata['code'] data = { 'name': pdf_fname.replace('.pdf', ''), 'code': has_code, 'affiliations': affiliations } output.append(data) if i % 100 == 0: # Write periodically as a failsafe write_output(args.output, output) write_output(args.output, output)