def geneNormalization(documents): print("Gene Normalization:", file=sys.stderr) for document in tqdm.tqdm(documents): ids_by_pmid = getGeneByPMID(document['id']) for passage in document['passages']: for ann in passage['annotations']: ids_by_mention = getGeneByMention(ann['text']) flag = False for id_by_mention in ids_by_mention: if id_by_mention in ids_by_pmid: flag = True Annotation.setNCBIID(ann, id_by_mention) break if not flag: Annotation.setNCBIID( ann, ids_by_mention[0] if len(ids_by_mention) > 0 else 'TBD')