def en_parsers_speed(self): file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt') with codecs.open(file_path, 'r', encoding='utf-8') as fr: text = fr.read() ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/') entities_fn = ge_path + 'geoentities.csv' aliases_fn = ge_path + 'geoaliases.csv' geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn)) times = {} # type: Dict[str, float] self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times) self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times) self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times) self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times) self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times) self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times) self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times) self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times) self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times) self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times) self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times) self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times) self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times) self.check_time(text, lambda s: list(get_money(s)), 'get_money', times) self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times) self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times) self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times) self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times) self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times) self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times) self.assertTrue('get_amounts' in times)
def get_vacation_duration(text, return_source=False): found_time_unit = None duration = None found_vacation_trigger = False TRIGGER_LIST_VACATION = ["vacation", "paid time off"] for v in TRIGGER_LIST_VACATION: if (findWholeWordorPhrase(v)(text)) is not None: found_vacation_trigger = True break if found_vacation_trigger: for t in TRIGGER_LIST_TIME_UNIT: if (findWholeWordorPhrase(t[0])(text)) is not None: found_time_unit = t[1] break if found_time_unit is not None: found_duration = list(get_durations(text)) if len(found_duration) > 0: # take first duration duration = found_duration[0] if return_source: return duration, found_time_unit, text else: return duration, found_time_unit else: return None
def _extract_variants_from_text(self, field, text: str, **kwargs): durations = get_durations(text) if not durations: return None return [ duration[2] for duration in durations if duration[2] < DurationField.MAX_DURATION ]
def test_durations_digits(self): text = "I'd been waiting for 15 minutes before you finally came." ds = list(get_durations(text)) self.assertEqual(1, len(ds)) ant = list(get_duration_annotations(text))[0] self.assertEqual((21, 32), ant.coords) cite = ant.get_cite() self.assertEqual('/en/duration/15.0/minute', cite)
def extraction_function(self, field, possible_value, text): if possible_value is None and not text: return None if possible_value and type(possible_value) is tuple and len( possible_value) == 3: return possible_value possible_value = str(possible_value) if possible_value else text durations = list(get_durations(possible_value)) duration = ValueExtractionHint.get_value(durations, field.item_number) return duration
def parse(self, text, text_unit_id, _text_unit_lang, **kwargs) -> ParseResults: found = list(durations.get_durations(text, return_sources=True)) if found: unique = set(found) return ParseResults({ DateDurationUsage: [ DateDurationUsage(text_unit_id=text_unit_id, amount=item[1], amount_str=item[3], duration_type=item[0], duration_days=item[2], count=found.count(item)) for item in unique ] })
lambda sentence: all( [(word in sentence) for word in ['paid', 'rent', 'monthly']]), lambda sentence: all( [(word in sentence) for word in ['payments', 'rent', 'monthly']]), lambda sentence: all( [(word in sentence) for word in ['pay', 'per', 'month']]), lambda sentence: all( [(word in sentence) for word in ['payable', 'per', 'month']]), lambda sentence: all( [(word in sentence) for word in ['payable', 'monthly']]) ], fill_fields={'rent_due_frequency': 'monthly'}) ], 'renew_non_renew_notice': [ FieldDetector(select=r'(?:lessor|tenant).+intends\s+to.+lease.+(?:notice|notify)', process_selected=lambda sentence, match: get_durations(sentence), fill_fields=lambda sentence, durations: {'auto_renew': False, 'renew_non_renew_notice': durations[ 0]}), FieldDetector(select=r'given.+option.+to\s+(?:renew|extend)', fill_fields=lambda sentence, durations: {'auto_renew': False}), FieldDetector(select=r'to\s+(?:renew|extend).+(?:shall|must).+notice', fill_fields=lambda sentence, durations: {'auto_renew': False}), FieldDetector(select=r'shall\s+automatically\s+(?:extend|renew)', fill_fields=lambda sentence, durations: {'auto_renew': True}), FieldDetector(select=r'notice.+to.+(?:extend|renew)', exclude=[r'agree'], process_selected=lambda sentence, match: get_durations(sentence), fill_fields=lambda sentence, durations: { 'renew_non_renew_notice': durations[0]}), FieldDetector(select=r'right\s+to\s+(?:renew|extend)',
def _extract_variants_from_text(self, field, text: str): durations = get_durations(text) if not durations: return None return [duration[2] for duration in durations]