예제 #1
0
    def en_parsers_speed(self):
        file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt')
        with codecs.open(file_path, 'r', encoding='utf-8') as fr:
            text = fr.read()

        ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/')
        entities_fn = ge_path + 'geoentities.csv'
        aliases_fn = ge_path + 'geoaliases.csv'
        geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn))

        times = {}  # type: Dict[str, float]
        self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times)
        self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times)
        self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times)
        self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times)
        self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times)
        self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times)
        self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times)
        self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times)
        self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times)
        self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times)
        self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times)
        self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times)
        self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times)
        self.check_time(text, lambda s: list(get_money(s)), 'get_money', times)
        self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times)
        self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times)
        self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times)
        self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times)
        self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times)
        self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times)

        self.assertTrue('get_amounts' in times)
예제 #2
0
def get_vacation_duration(text, return_source=False):
    found_time_unit = None
    duration = None
    found_vacation_trigger = False
    TRIGGER_LIST_VACATION = ["vacation", "paid time off"]

    for v in TRIGGER_LIST_VACATION:
        if (findWholeWordorPhrase(v)(text)) is not None:
            found_vacation_trigger = True
            break
    if found_vacation_trigger:
        for t in TRIGGER_LIST_TIME_UNIT:
            if (findWholeWordorPhrase(t[0])(text)) is not None:
                found_time_unit = t[1]
                break
        if found_time_unit is not None:
            found_duration = list(get_durations(text))
            if len(found_duration) > 0:
                # take first duration
                duration = found_duration[0]
            if return_source:
                return duration, found_time_unit, text
            else:
                return duration, found_time_unit
    else:
        return None
 def _extract_variants_from_text(self, field, text: str, **kwargs):
     durations = get_durations(text)
     if not durations:
         return None
     return [
         duration[2] for duration in durations
         if duration[2] < DurationField.MAX_DURATION
     ]
    def test_durations_digits(self):
        text = "I'd been waiting for 15 minutes before you finally came."
        ds = list(get_durations(text))
        self.assertEqual(1, len(ds))

        ant = list(get_duration_annotations(text))[0]
        self.assertEqual((21, 32), ant.coords)
        cite = ant.get_cite()
        self.assertEqual('/en/duration/15.0/minute', cite)
예제 #5
0
    def extraction_function(self, field, possible_value, text):
        if possible_value is None and not text:
            return None

        if possible_value and type(possible_value) is tuple and len(
                possible_value) == 3:
            return possible_value

        possible_value = str(possible_value) if possible_value else text
        durations = list(get_durations(possible_value))
        duration = ValueExtractionHint.get_value(durations, field.item_number)
        return duration
예제 #6
0
 def parse(self, text, text_unit_id, _text_unit_lang,
           **kwargs) -> ParseResults:
     found = list(durations.get_durations(text, return_sources=True))
     if found:
         unique = set(found)
         return ParseResults({
             DateDurationUsage: [
                 DateDurationUsage(text_unit_id=text_unit_id,
                                   amount=item[1],
                                   amount_str=item[3],
                                   duration_type=item[0],
                                   duration_days=item[2],
                                   count=found.count(item))
                 for item in unique
             ]
         })
예제 #7
0
                           lambda sentence: all(
                               [(word in sentence) for word in ['paid', 'rent', 'monthly']]),
                           lambda sentence: all(
                               [(word in sentence) for word in ['payments', 'rent', 'monthly']]),
                           lambda sentence: all(
                               [(word in sentence) for word in ['pay', 'per', 'month']]),
                           lambda sentence: all(
                               [(word in sentence) for word in ['payable', 'per', 'month']]),
                           lambda sentence: all(
                               [(word in sentence) for word in ['payable', 'monthly']])
                           ],
                   fill_fields={'rent_due_frequency': 'monthly'})
 ],
 'renew_non_renew_notice': [
     FieldDetector(select=r'(?:lessor|tenant).+intends\s+to.+lease.+(?:notice|notify)',
                   process_selected=lambda sentence, match: get_durations(sentence),
                   fill_fields=lambda sentence, durations: {'auto_renew': False,
                                                            'renew_non_renew_notice': durations[
                                                                0]}),
     FieldDetector(select=r'given.+option.+to\s+(?:renew|extend)',
                   fill_fields=lambda sentence, durations: {'auto_renew': False}),
     FieldDetector(select=r'to\s+(?:renew|extend).+(?:shall|must).+notice',
                   fill_fields=lambda sentence, durations: {'auto_renew': False}),
     FieldDetector(select=r'shall\s+automatically\s+(?:extend|renew)',
                   fill_fields=lambda sentence, durations: {'auto_renew': True}),
     FieldDetector(select=r'notice.+to.+(?:extend|renew)',
                   exclude=[r'agree'],
                   process_selected=lambda sentence, match: get_durations(sentence),
                   fill_fields=lambda sentence, durations: {
                       'renew_non_renew_notice': durations[0]}),
     FieldDetector(select=r'right\s+to\s+(?:renew|extend)',
 def _extract_variants_from_text(self, field, text: str):
     durations = get_durations(text)
     if not durations:
         return None
     return [duration[2] for duration in durations]