Exemple #1
0
def extract_disease_ontology_keywords(article):
    pc_article = pubcrawler.Article(article)
    anno_doc = AnnoDoc(pc_article.body)
    resolved_keyword_tier = anno_doc.require_tiers('resolved_keywords',
                                                   via=keyword_annotator)
    disease_ontology_keyword_dict = {}
    for span in resolved_keyword_tier:
        for resolution in span.metadata['resolutions']:
            entity = resolution['entity']
            if entity['type'] == 'disease':
                disease_ontology_keyword_dict[entity['id']] = {
                    "keyword": entity['label'],
                    "uri": entity['id']
                }
    return ({
        'index.keywords': 1,
        'keywords': {
            'disease-ontology': list(disease_ontology_keyword_dict.values())
        }
    })
Exemple #2
0
def extract_geonames(article, store_all=False):
    pc_article = pubcrawler.Article(article)
    anno_doc = AnnoDoc(pc_article.body)
    geoname_tier = anno_doc.require_tiers('geonames', via=geoname_annotator)
    geoname_dicts = {}
    for span in geoname_tier:
        geoname = span.metadata['geoname']
        result = {}
        if store_all:
            for key in GEONAME_ATTRS + ['score']:
                result[key] = geoname[key]
        else:
            result['geonameid'] = geoname['geonameid']
        geoname_dicts[result['geonameid']] = result
    return ({
        'index.geonames': 1,
        'geonames': {
            'culled': list(geoname_dicts.values())
        }
    })
 def test_incident_2(self):
     doc = AnnoDoc('There were 2 new cases in California in 2012.')
     case_counts = doc.require_tiers('infections', via=InfectionAnnotator)
     attribute_remappings = {'infection': 'case'}
     for span in case_counts:
         span.metadata['attributes'] = [
             attribute_remappings.get(attribute, attribute)
             for attribute in span.metadata['attributes']
         ]
     doc.add_tier(self.annotator, case_counts=case_counts)
     test_utils.assertHasProps(
         doc.tiers['incidents'].spans[0].metadata, {
             'value':
             2,
             'type':
             'caseCount',
             'dateRange': [
                 datetime.datetime(2012, 1, 1, 0, 0),
                 datetime.datetime(2013, 1, 1, 0, 0)
             ]
         })