예제 #1
0
def find_landlord_tenant(text: str):
    # landlord, tenant = find_landlord_tenant_re(text)

    # if landlord or tenant:
    #    return landlord, tenant

    companies = list(get_companies(text, detail_type=True, name_upper=True, strict=True))

    text = text.lower()

    min_index_landlord = min_index_of_word(text, _LANDLORD_SYNONYMS)
    min_index_tenant = min_index_of_word(text, _TENANT_SYNONYMS)

    if min_index_landlord < min_index_tenant:
        landlord = companies[0] if len(companies) > 0 else None
        tenant = companies[1] if len(companies) > 1 else None
    else:
        tenant = companies[0] if len(companies) > 0 else None
        landlord = companies[1] if len(companies) > 1 else None

    if landlord is not None and landlord[0] is not None:
        landlord = '{0}{1}'.format(landlord[0].upper(),
                                   (' ' + landlord[1].upper()) if landlord[1] is not None else '')

    if tenant is not None and tenant[0] is not None:
        tenant = '{0}{1}'.format(tenant[0].upper(),
                                 (' ' + tenant[1].upper()) if tenant[1] is not None else '')

    return landlord, tenant
예제 #2
0
def get_employer_name(text, return_source=False):
    definitions = list(get_definitions(text))

    companies = []
    defined_employer_found = False
    defined_employee_found = False
    first_company_string = None

    for d in definitions:
        if d.lower() in TRIGGER_LIST_COMPANY:
            defined_employer_found = True
        if d.lower() in TRIGGER_LIST_EMPLOYEE:
            defined_employee_found = True
        if defined_employee_found is True and defined_employer_found is True:
            break

    if defined_employer_found and defined_employee_found:
        companies = list(get_companies(text))
        if len(companies) > 0:
            # take first employer found
            first_company_string = ', '.join(str(s) for s in companies[0])

    if return_source:
        return first_company_string, text
    else:
        return first_company_string
    def parse(self,
              log: ProcessLogger,
              text,
              text_unit_id,
              _text_unit_lang,
              document_initial_load: bool = False,
              **kwargs) -> Optional[ParseResults]:
        # Here we override saving logic to workaround race conditions on party creation vs party usage saving
        if not document_initial_load:
            PartyUsage.objects.filter(text_unit_id=text_unit_id).delete()
        found = list(
            get_companies(text,
                          count_unique=True,
                          detail_type=True,
                          name_upper=True))
        if found:
            for _party in found:
                name, _type, type_abbr, type_label, type_desc, count = _party
                defaults = dict(type=_type,
                                type_label=type_label,
                                type_description=type_desc)
                party, created = Party.objects.get_or_create(
                    name=name, type_abbr=type_abbr or '', defaults=defaults)

                return ParseResults({
                    PartyUsage: [
                        PartyUsage(text_unit_id=text_unit_id,
                                   party=party,
                                   count=count)
                    ]
                })
 def _extract_variants_from_text(self, field, text: str, **kwargs):
     companies = list(get_companies(text, detail_type=True, name_upper=True, strict=True))
     if not companies:
         return None
     return ['{0}{1}'.format(company[0].upper(),
                             (' ' + company[1].upper()) if company[1] is not None else '')
             for company in companies]
예제 #5
0
def get_employee_name(text, return_source=False):
    definitions = list(get_definitions(text))
    fake_person = False
    found_employee = None
    defined_employee_found = False
    for d in definitions:
        if d.lower() in TRIGGER_LIST_EMPLOYEE:
            defined_employee_found = True
            break

    if defined_employee_found:
        persons = list(get_persons(text))
        companies = list(get_companies(text))
        for p in persons:
            person_is_a_company = False
            for f in FALSE_PEOPLE:
                if f in str(p).lower():
                    fake_person = True
            if not fake_person:
                for c in companies:
                    # persons and companies return slightly different values for same text
                    # so need to standardize to compare
                    if len(c) > 0:
                        if c[1] is not None and c[0] is not None:
                            company_full_string = str(
                                clean(c[0]) + clean(c[1]))
                        else:
                            company_full_string = str(clean(c[0]))

                        employee_full_string = str(clean(p))
                        # handle this- where get_companies picks up more surrounding text
                        # than get_persons: EMPLOYMENT AGREEMENT WHEREAS, Kensey Nash Corporation,
                        # a Delaware corporation (the “Company”) and Todd M. DeWitt
                        # (the “Executive”) entered into that certain Amended
                        # and Restated Employment Agreement,...
                        if (employee_full_string == company_full_string or
                                employee_full_string in company_full_string):
                            person_is_a_company = True

            if not person_is_a_company and not fake_person:
                found_employee = str(p)
                # take first person found meeting our employee criteria
                break
            fake_person = False  # reset for next person

    if return_source:
        return found_employee, text
    else:
        return found_employee
예제 #6
0
    def extraction_function(self, field, possible_value, text):
        if possible_value:
            return possible_value

        if possible_value is None and not text:
            return None

        companies = list(
            get_companies(text, detail_type=True, name_upper=True,
                          strict=True))

        company = ValueExtractionHint.get_value(companies, field.item_number)

        if company:
            return '{0}{1}'.format(
                company[0].upper(),
                (' ' + company[1].upper()) if company[1] is not None else '')
        else:
            return None
 def get_companies(cls,
                   text: str,
                   strict: bool = False,
                   use_gnp: bool = False,
                   detail_type: bool = False,
                   count_unique: bool = False,
                   name_upper: bool = False,
                   parse_name_abbr: bool = False,
                   return_source: bool = False):
     _filter = cls.get_banlist_filter()
     return get_companies(text,
                          strict=strict,
                          use_gnp=use_gnp,
                          detail_type=detail_type,
                          count_unique=count_unique,
                          name_upper=name_upper,
                          parse_name_abbr=parse_name_abbr,
                          return_source=return_source,
                          banlist_usage=_filter)
예제 #8
0
 def parse(self, text, text_unit_id, _text_unit_lang,
           **kwargs) -> ParseResults:
     found = list(
         get_companies(text,
                       count_unique=True,
                       detail_type=True,
                       name_upper=True))
     if found:
         pu_list = []
         for _party in found:
             name, _type, type_abbr, type_label, type_desc, count = _party
             defaults = dict(type=_type,
                             type_label=type_label,
                             type_description=type_desc)
             party, _ = Party.objects.get_or_create(name=name,
                                                    type_abbr=type_abbr
                                                    or '',
                                                    defaults=defaults)
             pu_list.append(
                 PartyUsage(text_unit_id=text_unit_id,
                            party=party,
                            count=count))
         return ParseResults({PartyUsage: pu_list})
예제 #9
0
 def extract_companies(self, text=None):
     if not text:
         text = self.text
     return list(lex_entities.get_companies(text))
예제 #10
0
 def getCompanies(self):
     mem = []
     companies = list(get_companies(self.bill_text))
     for company in companies:
         mem.append(str(company[0] + " " + str(company[1])))
     self.bill.info['companies'] = mem