def set_value_from_selection(self, doc: Document, value: str): if self.field_type == FieldType.FIELD_TYPE_CONCRETE_STRING: doc.__setattr__(self.field, value) elif self.field_type == FieldType.FIELD_TYPE_CONCRETE_FLOAT: try: doc.__setattr__(self.field, float(value)) except ValueError: nums = list(extractors.find_numbers(value)) if value else None doc.__setattr__(self.field, nums[0] if nums else None) elif self.field_type == FieldType.FIELD_TYPE_CONCRETE_INTEGER: try: doc.__setattr__(self.field, int(value)) except ValueError: nums = list(extractors.find_numbers(value)) if value else None doc.__setattr__(self.field, nums[0] if nums else None) elif self.field_type == FieldType.FIELD_TYPE_CONCRETE_DATE: d = dateparser.parse(value) if value else None if d: doc.__setattr__(self.field, d) else: dates = list(get_dates(value)) if value else None doc.__setattr__(self.field, dates[0] if dates else None) return doc.__getattribute__(self.field)
def en_parsers_speed(self): file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt') with codecs.open(file_path, 'r', encoding='utf-8') as fr: text = fr.read() ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/') entities_fn = ge_path + 'geoentities.csv' aliases_fn = ge_path + 'geoaliases.csv' geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn)) times = {} # type: Dict[str, float] self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times) self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times) self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times) self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times) self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times) self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times) self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times) self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times) self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times) self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times) self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times) self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times) self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times) self.check_time(text, lambda s: list(get_money(s)), 'get_money', times) self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times) self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times) self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times) self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times) self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times) self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times) self.assertTrue('get_amounts' in times)
def get_citations(text): """ Get citations containing "BGBl" :param text: str :return: yields dict """ for ptn in [CITATION_PTN_RE, SECOND_CITATION_PTN_RE, CITATION_RANGE_PTN_RE]: for match in ptn.finditer(text): capture = match.capturesdict() date = ''.join(capture.get('date', '')) if date: try: date = str(list(get_dates(date, 'de'))[0]['value']) except Exception as e: pass yield dict( location_start=match.start(), location_end=match.end(), text=capture['text'][0], article=''.join(capture.get('article', '')), number=''.join(capture.get('number', '')), subparagraph=''.join(capture.get('subparagraph', '')), sentence=''.join(capture.get('sentence', '')), paragraph=''.join(capture.get('paragraph', '')), letter=''.join(capture.get('letter', '')), date=date, part=capture['part'][0], page=', '.join(capture['page']), year=', '.join(capture.get('year', '')) )
def get_citation_annotations(cls, text: str) -> \ Generator[CitationAnnotation, None, None]: """ Get citations containing "BGBl" :param text: str :return: yields dict """ for ptn in [ cls.CITATION_PTN_RE, cls.SECOND_CITATION_PTN_RE, cls.CITATION_RANGE_PTN_RE ]: for match in ptn.finditer(text): capture = match.capturesdict() date = ''.join(capture.get('date', '')) if date: try: date = str(list(get_dates(date, 'de'))[0]['value']) except: pass ant = CitationAnnotation( coords=match.span(), text=capture['text'][0], paragraph=''.join(capture.get('paragraph', '')), subparagraph=''.join(capture.get('subparagraph', '')), letter=''.join(capture.get('letter', '')), date=date, part=capture['part'][0], locale='de') ant.article = TextAnnotation.get_int_value(''.join( capture.get('article', ''))) ant.number = TextAnnotation.get_int_value(''.join( capture.get('number', ''))) ant.sentence = TextAnnotation.get_int_value(''.join( capture.get('sentence', ''))) page_range = ', '.join(capture['page']) page = TextAnnotation.get_int_value(page_range) if page: ant.page = page else: ant.page_range = page_range volume_str = ''.join(capture.get('number', '')) volume = TextAnnotation.get_int_value(volume_str) if volume: ant.volume = volume else: ant.volume_str = volume_str year_str = ', '.join(capture.get('year', '')) year = TextAnnotation.get_int_value(year_str) if year: ant.year = year else: ant.year_str = year_str yield ant
def get_effective_date(text, return_source=False): # need a better more accurate way of doing this # right now looks for triggers and takes latest date in that sentence TRIGGER_LIST_START_DATE = ["dated as of", "effective as of", "made as of", "entered into as of"] found_start_date_trigger = False effective_date = None for t in TRIGGER_LIST_START_DATE: if findWholeWordorPhrase(t)(text) is not None: found_start_date_trigger = True break if found_start_date_trigger: dates = list(get_dates(text)) if len(dates) > 0: effective_date = max(dates) if return_source: return effective_date, text else: return effective_date
'permitted_use': _cleanup_sentence(match).strip()}), FieldDetector(select=r'(?:tenant|lessee)\s+(?:may|shall)\s+use.*(?:property|premises)(.*)', fill_fields=lambda sentence, match: { 'permitted_use': _cleanup_sentence(match).strip()}), FieldDetector(select=re.compile(r'Use.*[:.]\s+(.*)', re.DOTALL), fill_fields=lambda sentence, match: { 'permitted_use': _cleanup_sentence(match).strip()}), FieldDetector(select=re.compile(r'Permitted\s+[Uu]se.*[:.]\s+(.*)', re.DOTALL), fill_fields=lambda sentence, match: { 'permitted_use': _cleanup_sentence(match).strip()}) ], 'start_end_term': [ FieldDetector(select=r'shall\s+(?:commence|start).*\d.*', process_selected=lambda sentence, match: get_dates(match), fill_fields=lambda sentence, dates: {'commencement_date': dates[0]}), FieldDetector(select=r'shall\s+(?:end).*\d.*', process_selected=lambda sentence, match: get_dates(match), fill_fields=lambda sentence, dates: {'expiration_date': dates[0]}), FieldDetector(select=r'from.*\d.*', process_selected=lambda sentence, match: get_dates(match), fill_fields=lambda sentence, dates: {'commencement_date': dates[0]}), FieldDetector(select=r'commencement\s+date.*\d.*', process_selected=lambda sentence, match: get_dates(match), fill_fields=lambda sentence, dates: {'commencement_date': dates[0]}), FieldDetector(select=r'expiration\s+date.*\d.*', process_selected=lambda sentence, match: get_dates(match), fill_fields=lambda sentence, dates: {'expiration_date': dates[0]}), FieldDetector(select=r'term\W.*\d.*', process_selected=lambda sentence, match: get_durations(match),
def extract_dates(self, text=None): if not text: text = self.text return list(lex_dates.get_dates(text))
# path = sys.argv[1] # copy&paste brief pdf into .txt file. direct_path = "" with open(direct_path, 'r') as file: brief = file.read() processed_brief = lex_sentences.pre_process_document(brief) sentences_brief = lex_sentences.get_sentence_list(processed_brief) #print(sentences_brief) facts = [] for sentence in sentences_brief: dates = lex_dates.get_dates(sentence) for date in dates: facts.append((date,sentence)) for fact in facts: print("Question:\nWhy is {} significant?\n\nAnswer:\n{}".format(str(fact[0]), fact[1])) print("\n---------------\n") ''' Question: Why is 2018-11-26 significant? Answer: Case: 18-60522 Document: 00514736148 Page: 17 Date Filed: 11/26/2018 that these employees were constructively discharged because they faced a Hobson’s Choice? ---------------
def getDate(self): mem = [] dates = list(get_dates(self.bill_text)) for date in dates: mem.append(str(date)) self.bill.info['dates'] = mem