def _annotate_all_tiers(text): annotated = AnnoDoc(text) anno_tiers = [ GeonameAnnotator(), CountAnnotator(), ResolvedKeywordAnnotator(), DateAnnotator() ] for tier in anno_tiers: annotated.add_tiers(tier) return annotated
def annotated_example(): dirname = os.path.dirname(__file__) path = os.path.join(dirname, '..', 'data', 'fixtures', 'annotated_example.pkl') if not os.path.isfile(path): from epitator.annotator import AnnoDoc from epitator.count_annotator import CountAnnotator annotated = AnnoDoc('I am in Berlin. Here are 5 confirmed cases of influenza. ' 'Still, less worse than those 100 confirmed and 200 suspected cases last year.') annotated.add_tiers(CountAnnotator()) annotated = delete_non_epitator_name_entity_tiers(annotated) os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'wb') as handel: pickle.dump(annotated, handel) else: with open(path, 'rb') as handel: annotated = pickle.load(handel) return annotated
def __init__(self, classifier, dict_vectorizer, cutoff_ratio=0.65, keyword_array=None): self.keyword_array = keyword_array self.classifier = classifier self.geoname_annotator = GeonameAnnotator() self.count_annotator = CountAnnotator() self.date_annotator = DateAnnotator() self.keyword_annotator = KeywordAnnotator() self.resolved_keyword_annotator = ResolvedKeywordAnnotator() processing_pipeline = [] processing_pipeline.append(('link', LinkedKeywordAdder(keyword_array))) processing_pipeline.append(('limit', LimitCounts(1))) self.keyword_processor = Pipeline(processing_pipeline) self.dict_vectorizer = dict_vectorizer self.keywords = dict_vectorizer.get_feature_names() self.keyword_extractor = KeywordExtractor(keyword_array) self.cutoff_ratio = cutoff_ratio
def _annotate(self, text: str, entity: str) -> AnnoDoc: tier = {"counts": CountAnnotator(), "dates": DateAnnotator()} annotated = AnnoDoc(text) annotated.add_tiers(tier[entity]) return annotated
def _annotate(text: str, to_optimize: str) -> AnnoDoc: tier = {'counts': CountAnnotator(), 'dates': DateAnnotator()} annotated = AnnoDoc(text) annotated.add_tiers(tier[to_optimize]) return annotated
def setUp(self): self.annotator = CountAnnotator()