def test_duration_with_years(self): text = 'I lived there for three years, from 1999 until late 2001' doc = AnnoDoc(text) doc.add_tier(DateAnnotator(include_end_date=False)) self.assertEqual(len(doc.tiers['dates'].spans), 1) self.assertEqual( doc.tiers['dates'].spans[0].datetime_range, [datetime.datetime(1999, 1, 1), datetime.datetime(2002, 1, 1)])
def _annotate_all_tiers(text): annotated = AnnoDoc(text) anno_tiers = [ GeonameAnnotator(), CountAnnotator(), ResolvedKeywordAnnotator(), DateAnnotator() ] for tier in anno_tiers: annotated.add_tiers(tier) return annotated
def __init__(self, classifier, dict_vectorizer, cutoff_ratio=0.65, keyword_array=None): self.keyword_array = keyword_array self.classifier = classifier self.geoname_annotator = GeonameAnnotator() self.count_annotator = CountAnnotator() self.date_annotator = DateAnnotator() self.keyword_annotator = KeywordAnnotator() self.resolved_keyword_annotator = ResolvedKeywordAnnotator() processing_pipeline = [] processing_pipeline.append(('link', LinkedKeywordAdder(keyword_array))) processing_pipeline.append(('limit', LimitCounts(1))) self.keyword_processor = Pipeline(processing_pipeline) self.dict_vectorizer = dict_vectorizer self.keywords = dict_vectorizer.get_feature_names() self.keyword_extractor = KeywordExtractor(keyword_array) self.cutoff_ratio = cutoff_ratio
def main(in_file, out_file): f = open(in_file) txt = f.readlines() txt = ' '.join(txt) adoc = AnnoDoc(txt) adoc.add_tiers(GeonameAnnotator()) adoc.add_tiers(DateAnnotator()) adoc.add_tiers(ResolvedKeywordAnnotator()) # print(vars(adoc.tiers['geonames'])) # print(vars(adoc.tiers['dates'])) # print(vars(adoc.tiers['resolved_keywords'])) # Location geo = [x.to_dict() for x in adoc.tiers['geonames'].spans] # Date dates = [str(x) for x in adoc.tiers['dates'].spans] # Other other = [x.to_dict() for x in adoc.tiers['resolved_keywords'].spans] d = {'location': geo, 'date': dates, 'resolved_keyword': other} with open(out_file, 'w') as dest: json.dump(d, dest)
def _annotate(self, text: str, entity: str) -> AnnoDoc: tier = {"counts": CountAnnotator(), "dates": DateAnnotator()} annotated = AnnoDoc(text) annotated.add_tiers(tier[entity]) return annotated
def setUp(self): self.annotator = DateAnnotator()
def _annotate(text: str, to_optimize: str) -> AnnoDoc: tier = {'counts': CountAnnotator(), 'dates': DateAnnotator()} annotated = AnnoDoc(text) annotated.add_tiers(tier[to_optimize]) return annotated