def process_fields(doc: LeaseDocument, doc_text: str, task: ExtendedTask):
        sentences = get_sentence_list(doc_text)
        # fields = detect_fields(sentences, groups=('address',))
        fields = detect_fields(sentences)

        doc.address = fields.get('address')
        if not doc.address:
            doc.address = detect_address_default(doc_text, sentences)

        # term
        doc.commencement_date = fields.get('commencement_date')
        doc.expiration_date = fields.get('expiration_date')

        term_tuple = fields.get('term')
        if term_tuple:
            term = timedelta(days=term_tuple[2])
            if doc.commencement_date and not doc.expiration_date:
                doc.expiration_date = doc.commencement_date + term
            elif not doc.commencement_date and doc.expiration_date:
                doc.commencement_date = doc.expiration_date - term

        if doc.commencement_date \
                and doc.expiration_date \
                and doc.commencement_date >= doc.expiration_date:
            doc.expiration_date = None

        # lease type
        pay_taxes = int(fields.get('pay_taxes') or False)
        pay_costs = int(fields.get('pay_costs') or False)
        pay_insurance = int(fields.get('pay_insurance') or False)
        lt = pay_taxes + pay_costs + pay_insurance
        if lt == 3:
            doc.lease_type = 'triple-net'
        elif lt == 2:
            doc.lease_type = 'double-net'
        elif lt == 1:
            doc.lease_type = 'single-net'
        else:
            doc.lease_type = 'gross'

        # property type
        property_types = list(fields.get('property_types__set') or set())
        property_types.sort()
        doc.property_type = '; '.join(property_types)

        # permitted use
        doc.permitted_uses = fields.get('permitted_use')

        # prohibited use
        doc.prohibited_uses = ProcessLeaseDocuments.ordered_list_without_repetitions(
            fields.get('prohibited_use__list'))
        renew_duration_tuple = fields.get('renew_non_renew_notice')
        if renew_duration_tuple:
            doc.renew_non_renew_notice_duration = timedelta(days=renew_duration_tuple[2])

        auto_renew = fields.get('auto_renew')
        if auto_renew is not None:
            doc.auto_renew = auto_renew

        area_square_feet_list = fields.get('area_square_feet__list')
        if area_square_feet_list:
            doc.area_size_sq_ft = area_square_feet_list[0]

        doc.alterations_allowed = ProcessLeaseDocuments.ordered_list_without_repetitions(
            fields.get('alterations_allowed__list'))

        security_deposit = fields.get('security_deposit__set')
        if security_deposit:
            doc.security_deposit = max(security_deposit)

        doc.rent_due_frequency = fields.get('rent_due_frequency')

        mean_rent_per_month = fields.get('mean_rent_per_month__set')
        if mean_rent_per_month:
            doc.mean_rent_per_month = max(mean_rent_per_month)
Example #2
0
    def process_fields(doc: LeaseDocument, doc_text: str, task: ExtendedTask):
        sentences = get_sentence_list(doc_text)
        # fields = detect_fields(sentences, groups=('address',))
        fields = detect_fields(sentences)

        doc.address = fields.get('address')
        if not doc.address:
            doc.address = detect_address_default(doc_text, sentences)

        if doc.address:
            g = geocoder.google(doc.address)
            if g.ok:
                doc.address_latitude = g.lat
                doc.address_longitude = g.lng
                doc.address_country = g.country_long
                doc.address_state_province = g.province_long
            elif g.status and 'ZERO' in g.status:
                # Google does not know such address - probably we detected it wrong.
                doc.address = None
                doc.address_state_province = None
                doc.address_country = None
                doc.address_longitude = None
                doc.address_latitude = None
            else:
                task.log_warn(
                    'Google did not return geocode info for: {0}\nResponse: {1}'.format(doc.address,
                                                                                        g))
        # return

        # term
        doc.commencement_date = fields.get('commencement_date')
        doc.expiration_date = fields.get('expiration_date')

        term_tuple = fields.get('term')
        if term_tuple:
            term = timedelta(days=term_tuple[2])
            if doc.commencement_date and not doc.expiration_date:
                doc.expiration_date = doc.commencement_date + term
            elif not doc.commencement_date and doc.expiration_date:
                doc.commencement_date = doc.expiration_date - term

        if doc.commencement_date \
                and doc.expiration_date \
                and doc.commencement_date >= doc.expiration_date:
            doc.expiration_date = None

        # lease type
        pay_taxes = int(fields.get('pay_taxes') or False)
        pay_costs = int(fields.get('pay_costs') or False)
        pay_insurance = int(fields.get('pay_insurance') or False)
        lt = pay_taxes + pay_costs + pay_insurance
        if lt == 3:
            doc.lease_type = 'triple-net'
        elif lt == 2:
            doc.lease_type = 'double-net'
        elif lt == 1:
            doc.lease_type = 'single-net'
        else:
            doc.lease_type = 'gross'

        # property type
        property_types = list(fields.get('property_types__set') or set())
        property_types.sort()
        doc.property_type = '; '.join(property_types)

        # permitted use
        doc.permitted_uses = fields.get('permitted_use')

        # prohibited use
        doc.prohibited_uses = ProcessLeaseDocuments.ordered_list_without_repetitions(
            fields.get('prohibited_use__list'))
        renew_duration_tuple = fields.get('renew_non_renew_notice')
        if renew_duration_tuple:
            doc.renew_non_renew_notice_duration = timedelta(days=renew_duration_tuple[2])

        auto_renew = fields.get('auto_renew')
        if auto_renew is not None:
            doc.auto_renew = auto_renew

        area_square_feet_list = fields.get('area_square_feet__list')
        if area_square_feet_list:
            doc.area_size_sq_ft = area_square_feet_list[0]

        doc.alterations_allowed = ProcessLeaseDocuments.ordered_list_without_repetitions(
            fields.get('alterations_allowed__list'))

        security_deposit = fields.get('security_deposit__set')
        if security_deposit:
            doc.security_deposit = max(security_deposit)

        doc.rent_due_frequency = fields.get('rent_due_frequency')

        mean_rent_per_month = fields.get('mean_rent_per_month__set')
        if mean_rent_per_month:
            doc.mean_rent_per_month = max(mean_rent_per_month)