def en_parsers_speed(self): file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt') with codecs.open(file_path, 'r', encoding='utf-8') as fr: text = fr.read() ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/') entities_fn = ge_path + 'geoentities.csv' aliases_fn = ge_path + 'geoaliases.csv' geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn)) times = {} # type: Dict[str, float] self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times) self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times) self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times) self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times) self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times) self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times) self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times) self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times) self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times) self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times) self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times) self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times) self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times) self.check_time(text, lambda s: list(get_money(s)), 'get_money', times) self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times) self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times) self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times) self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times) self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times) self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times) self.assertTrue('get_amounts' in times)
def test_copyrights(self): text = '(C)Maverick(R) International Processing Services, Inc. 1999' cs = list(get_copyright(text)) self.assertEqual(1, len(cs)) ant = list(get_copyright_annotations(text))[0] self.assertEqual((0, 61), ant.coords) cite = ant.get_cite() self.assertEqual('/en/copyright/Maverick/1999', cite)
def parse(self, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults: found = list(copyright.get_copyright(text, return_sources=True)) if found: unique = set(found) return ParseResults({ CopyrightUsage: [ CopyrightUsage(text_unit_id=text_unit_id, year=item[1], name=item[2][:200], copyright_str=item[3][:200], count=found.count(item)) for item in unique if len(item[2]) < 100 ] })