Exemple #1
0
    def set_value_from_selection(self, doc: Document, value: str):
        if self.field_type == FieldType.FIELD_TYPE_CONCRETE_STRING:
            doc.__setattr__(self.field, value)

        elif self.field_type == FieldType.FIELD_TYPE_CONCRETE_FLOAT:
            try:
                doc.__setattr__(self.field, float(value))
            except ValueError:
                nums = list(extractors.find_numbers(value)) if value else None
                doc.__setattr__(self.field, nums[0] if nums else None)

        elif self.field_type == FieldType.FIELD_TYPE_CONCRETE_INTEGER:
            try:
                doc.__setattr__(self.field, int(value))
            except ValueError:
                nums = list(extractors.find_numbers(value)) if value else None
                doc.__setattr__(self.field, nums[0] if nums else None)

        elif self.field_type == FieldType.FIELD_TYPE_CONCRETE_DATE:
            d = dateparser.parse(value) if value else None
            if d:
                doc.__setattr__(self.field, d)
            else:
                dates = list(get_dates(value)) if value else None
                doc.__setattr__(self.field, dates[0] if dates else None)

        return doc.__getattribute__(self.field)
Exemple #2
0
    def en_parsers_speed(self):
        file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt')
        with codecs.open(file_path, 'r', encoding='utf-8') as fr:
            text = fr.read()

        ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/')
        entities_fn = ge_path + 'geoentities.csv'
        aliases_fn = ge_path + 'geoaliases.csv'
        geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn))

        times = {}  # type: Dict[str, float]
        self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times)
        self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times)
        self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times)
        self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times)
        self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times)
        self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times)
        self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times)
        self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times)
        self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times)
        self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times)
        self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times)
        self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times)
        self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times)
        self.check_time(text, lambda s: list(get_money(s)), 'get_money', times)
        self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times)
        self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times)
        self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times)
        self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times)
        self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times)
        self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times)

        self.assertTrue('get_amounts' in times)
def get_citations(text):
    """
    Get citations containing "BGBl"
    :param text: str
    :return: yields dict
    """

    for ptn in [CITATION_PTN_RE, SECOND_CITATION_PTN_RE, CITATION_RANGE_PTN_RE]:
        for match in ptn.finditer(text):
            capture = match.capturesdict()
            date = ''.join(capture.get('date', ''))
            if date:
                try:
                    date = str(list(get_dates(date, 'de'))[0]['value'])
                except Exception as e:
                    pass
            yield dict(
                location_start=match.start(),
                location_end=match.end(),
                text=capture['text'][0],
                article=''.join(capture.get('article', '')),
                number=''.join(capture.get('number', '')),
                subparagraph=''.join(capture.get('subparagraph', '')),
                sentence=''.join(capture.get('sentence', '')),
                paragraph=''.join(capture.get('paragraph', '')),
                letter=''.join(capture.get('letter', '')),
                date=date,
                part=capture['part'][0],
                page=', '.join(capture['page']),
                year=', '.join(capture.get('year', ''))
            )
    def get_citation_annotations(cls, text: str) -> \
            Generator[CitationAnnotation, None, None]:
        """
        Get citations containing "BGBl"
        :param text: str
        :return: yields dict
        """

        for ptn in [
                cls.CITATION_PTN_RE, cls.SECOND_CITATION_PTN_RE,
                cls.CITATION_RANGE_PTN_RE
        ]:
            for match in ptn.finditer(text):
                capture = match.capturesdict()
                date = ''.join(capture.get('date', ''))
                if date:
                    try:
                        date = str(list(get_dates(date, 'de'))[0]['value'])
                    except:
                        pass

                ant = CitationAnnotation(
                    coords=match.span(),
                    text=capture['text'][0],
                    paragraph=''.join(capture.get('paragraph', '')),
                    subparagraph=''.join(capture.get('subparagraph', '')),
                    letter=''.join(capture.get('letter', '')),
                    date=date,
                    part=capture['part'][0],
                    locale='de')
                ant.article = TextAnnotation.get_int_value(''.join(
                    capture.get('article', '')))
                ant.number = TextAnnotation.get_int_value(''.join(
                    capture.get('number', '')))
                ant.sentence = TextAnnotation.get_int_value(''.join(
                    capture.get('sentence', '')))

                page_range = ', '.join(capture['page'])
                page = TextAnnotation.get_int_value(page_range)
                if page:
                    ant.page = page
                else:
                    ant.page_range = page_range

                volume_str = ''.join(capture.get('number', ''))
                volume = TextAnnotation.get_int_value(volume_str)
                if volume:
                    ant.volume = volume
                else:
                    ant.volume_str = volume_str

                year_str = ', '.join(capture.get('year', ''))
                year = TextAnnotation.get_int_value(year_str)
                if year:
                    ant.year = year
                else:
                    ant.year_str = year_str

                yield ant
Exemple #5
0
def get_effective_date(text, return_source=False):
    # need a better more accurate way of doing this
    # right now looks for triggers and takes latest date in that sentence
    TRIGGER_LIST_START_DATE = ["dated as of", "effective as of", "made as of", "entered into as of"]
    found_start_date_trigger = False
    effective_date = None

    for t in TRIGGER_LIST_START_DATE:
        if findWholeWordorPhrase(t)(text) is not None:
            found_start_date_trigger = True
            break

    if found_start_date_trigger:
        dates = list(get_dates(text))
        if len(dates) > 0:
            effective_date = max(dates)
    if return_source:
        return effective_date, text
    else:
        return effective_date
Exemple #6
0
                          'permitted_use': _cleanup_sentence(match).strip()}),
        FieldDetector(select=r'(?:tenant|lessee)\s+(?:may|shall)\s+use.*(?:property|premises)(.*)',
                      fill_fields=lambda sentence, match: {
                          'permitted_use': _cleanup_sentence(match).strip()}),
        FieldDetector(select=re.compile(r'Use.*[:.]\s+(.*)', re.DOTALL),
                      fill_fields=lambda sentence, match: {
                          'permitted_use': _cleanup_sentence(match).strip()}),
        FieldDetector(select=re.compile(r'Permitted\s+[Uu]se.*[:.]\s+(.*)', re.DOTALL),
                      fill_fields=lambda sentence, match: {
                          'permitted_use': _cleanup_sentence(match).strip()})
    ],

    'start_end_term': [

        FieldDetector(select=r'shall\s+(?:commence|start).*\d.*',
                      process_selected=lambda sentence, match: get_dates(match),
                      fill_fields=lambda sentence, dates: {'commencement_date': dates[0]}),
        FieldDetector(select=r'shall\s+(?:end).*\d.*',
                      process_selected=lambda sentence, match: get_dates(match),
                      fill_fields=lambda sentence, dates: {'expiration_date': dates[0]}),
        FieldDetector(select=r'from.*\d.*',
                      process_selected=lambda sentence, match: get_dates(match),
                      fill_fields=lambda sentence, dates: {'commencement_date': dates[0]}),
        FieldDetector(select=r'commencement\s+date.*\d.*',
                      process_selected=lambda sentence, match: get_dates(match),
                      fill_fields=lambda sentence, dates: {'commencement_date': dates[0]}),
        FieldDetector(select=r'expiration\s+date.*\d.*',
                      process_selected=lambda sentence, match: get_dates(match),
                      fill_fields=lambda sentence, dates: {'expiration_date': dates[0]}),
        FieldDetector(select=r'term\W.*\d.*',
                      process_selected=lambda sentence, match: get_durations(match),
 def extract_dates(self, text=None):
     if not text:
         text = self.text
     return list(lex_dates.get_dates(text))
Exemple #8
0
# path = sys.argv[1]

# copy&paste brief pdf into .txt file. 
direct_path = ""

with open(direct_path, 'r') as file:
    brief = file.read()

processed_brief = lex_sentences.pre_process_document(brief)
sentences_brief = lex_sentences.get_sentence_list(processed_brief)

#print(sentences_brief)

facts = []
for sentence in sentences_brief:
    dates = lex_dates.get_dates(sentence)
    for date in dates:
        facts.append((date,sentence))

for fact in facts:
    print("Question:\nWhy is {} significant?\n\nAnswer:\n{}".format(str(fact[0]), fact[1]))
    print("\n---------------\n")
    
    
    '''
    Question:
    Why is 2018-11-26 significant?
    Answer:
    Case: 18-60522 Document: 00514736148 Page: 17 Date Filed: 11/26/2018
    that these employees were constructively discharged because they faced a Hobson’s Choice?
    ---------------
Exemple #9
0
 def getDate(self):
     mem = []
     dates = list(get_dates(self.bill_text))
     for date in dates:
         mem.append(str(date))
     self.bill.info['dates'] = mem