Ejemplo n.º 1
0
 def test_duration_with_years(self):
     text = 'I lived there for three years, from 1999 until late 2001'
     doc = AnnoDoc(text)
     doc.add_tier(DateAnnotator(include_end_date=False))
     self.assertEqual(len(doc.tiers['dates'].spans), 1)
     self.assertEqual(
         doc.tiers['dates'].spans[0].datetime_range,
         [datetime.datetime(1999, 1, 1),
          datetime.datetime(2002, 1, 1)])
Ejemplo n.º 2
0
def _annotate_all_tiers(text):
    annotated = AnnoDoc(text)
    anno_tiers = [
        GeonameAnnotator(),
        CountAnnotator(),
        ResolvedKeywordAnnotator(),
        DateAnnotator()
    ]
    for tier in anno_tiers:
        annotated.add_tiers(tier)
    return annotated
Ejemplo n.º 3
0
 def __init__(self,
              classifier,
              dict_vectorizer,
              cutoff_ratio=0.65,
              keyword_array=None):
     self.keyword_array = keyword_array
     self.classifier = classifier
     self.geoname_annotator = GeonameAnnotator()
     self.count_annotator = CountAnnotator()
     self.date_annotator = DateAnnotator()
     self.keyword_annotator = KeywordAnnotator()
     self.resolved_keyword_annotator = ResolvedKeywordAnnotator()
     processing_pipeline = []
     processing_pipeline.append(('link', LinkedKeywordAdder(keyword_array)))
     processing_pipeline.append(('limit', LimitCounts(1)))
     self.keyword_processor = Pipeline(processing_pipeline)
     self.dict_vectorizer = dict_vectorizer
     self.keywords = dict_vectorizer.get_feature_names()
     self.keyword_extractor = KeywordExtractor(keyword_array)
     self.cutoff_ratio = cutoff_ratio
Ejemplo n.º 4
0
def main(in_file, out_file):
    f = open(in_file)
    txt = f.readlines()
    txt = ' '.join(txt)
    adoc = AnnoDoc(txt)
    adoc.add_tiers(GeonameAnnotator())
    adoc.add_tiers(DateAnnotator())
    adoc.add_tiers(ResolvedKeywordAnnotator())
    # print(vars(adoc.tiers['geonames']))
    # print(vars(adoc.tiers['dates']))
    # print(vars(adoc.tiers['resolved_keywords']))

    # Location
    geo = [x.to_dict() for x in adoc.tiers['geonames'].spans]
    # Date
    dates = [str(x) for x in adoc.tiers['dates'].spans]
    # Other
    other = [x.to_dict() for x in adoc.tiers['resolved_keywords'].spans]

    d = {'location': geo, 'date': dates, 'resolved_keyword': other}

    with open(out_file, 'w') as dest:
        json.dump(d, dest)
Ejemplo n.º 5
0
 def _annotate(self, text: str, entity: str) -> AnnoDoc:
     tier = {"counts": CountAnnotator(), "dates": DateAnnotator()}
     annotated = AnnoDoc(text)
     annotated.add_tiers(tier[entity])
     return annotated
Ejemplo n.º 6
0
 def setUp(self):
     self.annotator = DateAnnotator()
Ejemplo n.º 7
0
def _annotate(text: str, to_optimize: str) -> AnnoDoc:
    tier = {'counts': CountAnnotator(), 'dates': DateAnnotator()}
    annotated = AnnoDoc(text)
    annotated.add_tiers(tier[to_optimize])
    return annotated