def extract_disease_ontology_keywords(article): pc_article = pubcrawler.Article(article) anno_doc = AnnoDoc(pc_article.body) resolved_keyword_tier = anno_doc.require_tiers('resolved_keywords', via=keyword_annotator) disease_ontology_keyword_dict = {} for span in resolved_keyword_tier: for resolution in span.metadata['resolutions']: entity = resolution['entity'] if entity['type'] == 'disease': disease_ontology_keyword_dict[entity['id']] = { "keyword": entity['label'], "uri": entity['id'] } return ({ 'index.keywords': 1, 'keywords': { 'disease-ontology': list(disease_ontology_keyword_dict.values()) } })
def extract_geonames(article, store_all=False): pc_article = pubcrawler.Article(article) anno_doc = AnnoDoc(pc_article.body) geoname_tier = anno_doc.require_tiers('geonames', via=geoname_annotator) geoname_dicts = {} for span in geoname_tier: geoname = span.metadata['geoname'] result = {} if store_all: for key in GEONAME_ATTRS + ['score']: result[key] = geoname[key] else: result['geonameid'] = geoname['geonameid'] geoname_dicts[result['geonameid']] = result return ({ 'index.geonames': 1, 'geonames': { 'culled': list(geoname_dicts.values()) } })
def test_incident_2(self): doc = AnnoDoc('There were 2 new cases in California in 2012.') case_counts = doc.require_tiers('infections', via=InfectionAnnotator) attribute_remappings = {'infection': 'case'} for span in case_counts: span.metadata['attributes'] = [ attribute_remappings.get(attribute, attribute) for attribute in span.metadata['attributes'] ] doc.add_tier(self.annotator, case_counts=case_counts) test_utils.assertHasProps( doc.tiers['incidents'].spans[0].metadata, { 'value': 2, 'type': 'caseCount', 'dateRange': [ datetime.datetime(2012, 1, 1, 0, 0), datetime.datetime(2013, 1, 1, 0, 0) ] })