コード例 #1
0
    def load_field_values(task: ExtendedTask, document: Document,
                          document_fields: dict, field_owners: dict) -> Dict:
        document_type = document.document_type
        fields_to_values = {}

        if not document_type:
            return fields_to_values

        field_code_aliases = document_type.field_code_aliases

        field_codes_to_fields = {
            f.code.lower(): f
            for f in document_type.fields.all()
        }

        if field_code_aliases:
            field_codes_to_fields.update({
                field_alias.lower():
                field_codes_to_fields.get(field_code.lower())
                for field_alias, field_code in field_code_aliases.items()
                if field_alias and field_code
            })

        def _maybe_add_val(f, v, owner):
            if v is None:
                return
            v = DetectedFieldValue(f, v, user=owner)
            prev = fields_to_values.get(f)
            if not prev:
                fields_to_values[f] = [v]
            else:
                prev.append(v)

        for field_alias, field_value_text in document_fields.items():
            if field_value_text is None:
                continue

            field = field_codes_to_fields.get(field_alias.lower())
            if not field:
                task.log_warn(
                    'Field alias "{0}" not found for document type {1}'.format(
                        field_alias, document_type.code))
                continue
            field_type_adapter = FIELD_TYPES_REGISTRY.get(
                field.type)  # type: FieldType
            field_owner = field_owners.get(field_alias)

            if type(field_value_text) is list:
                for possible_value_text in list(field_value_text):
                    maybe_value = field_type_adapter.extract_from_possible_value_text(
                        field, possible_value_text)
                    _maybe_add_val(field, maybe_value, field_owner)
            else:
                maybe_value = field_type_adapter.extract_from_possible_value_text(
                    field, field_value_text)
                _maybe_add_val(field, maybe_value, field_owner)

        return fields_to_values
コード例 #2
0
    def load_field_values(task: ExtendedTask, document: Document, document_fields_alias_to_value: Dict[str, Any]) \
            -> Dict[DocumentField, FieldValueDTO]:
        document_type = document.document_type
        fields_to_values = dict()  # type: Dict[DocumentField, FieldValueDTO]

        if not document_type:
            return fields_to_values

        field_code_aliases = document_type.field_code_aliases

        field_codes_to_fields = {f.code.lower(): f for f in document_type.fields.all()}

        if field_code_aliases:
            field_codes_to_fields.update({field_alias.lower(): field_codes_to_fields.get(field_code.lower())
                                          for field_alias, field_code in field_code_aliases.items() if
                                          field_alias and field_code})

        for field_alias, field_value_text in document_fields_alias_to_value.items():
            if field_value_text is None:
                continue

            field = field_codes_to_fields.get(field_alias.lower())  # type: DocumentField
            if not field:
                task.log_warn(
                    'Field alias "{0}" not found for document type {1}'.format(field_alias, document_type.code))
                continue
            typed_field = TypedField.by(field)  # type: TypedField

            if type(field_value_text) is list:
                for possible_value_text in list(field_value_text):
                    maybe_value = typed_field.extract_from_possible_value_text(possible_value_text)
                    if maybe_value:
                        maybe_value = typed_field.field_value_python_to_json(maybe_value)
                        fields_to_values[field] = FieldValueDTO(field_value=maybe_value)
                        break
            else:
                maybe_value = typed_field.extract_from_possible_value_text(field_value_text)
                if maybe_value:
                    maybe_value = typed_field.field_value_python_to_json(maybe_value)
                    fields_to_values[field] = FieldValueDTO(field_value=maybe_value)

        return fields_to_values
コード例 #3
0
    def process_fields(doc: LeaseDocument, doc_text: str, task: ExtendedTask):
        sentences = get_sentence_list(doc_text)
        # fields = detect_fields(sentences, groups=('address',))
        fields = detect_fields(sentences)

        doc.address = fields.get('address')
        if not doc.address:
            doc.address = detect_address_default(doc_text, sentences)

        if doc.address:
            g = geocoder.google(doc.address)
            if g.ok:
                doc.address_latitude = g.lat
                doc.address_longitude = g.lng
                doc.address_country = g.country_long
                doc.address_state_province = g.province_long
            elif g.status and 'ZERO' in g.status:
                # Google does not know such address - probably we detected it wrong.
                doc.address = None
                doc.address_state_province = None
                doc.address_country = None
                doc.address_longitude = None
                doc.address_latitude = None
            else:
                task.log_warn(
                    'Google did not return geocode info for: {0}\nResponse: {1}'.format(doc.address,
                                                                                        g))
        # return

        # term
        doc.commencement_date = fields.get('commencement_date')
        doc.expiration_date = fields.get('expiration_date')

        term_tuple = fields.get('term')
        if term_tuple:
            term = timedelta(days=term_tuple[2])
            if doc.commencement_date and not doc.expiration_date:
                doc.expiration_date = doc.commencement_date + term
            elif not doc.commencement_date and doc.expiration_date:
                doc.commencement_date = doc.expiration_date - term

        if doc.commencement_date \
                and doc.expiration_date \
                and doc.commencement_date >= doc.expiration_date:
            doc.expiration_date = None

        # lease type
        pay_taxes = int(fields.get('pay_taxes') or False)
        pay_costs = int(fields.get('pay_costs') or False)
        pay_insurance = int(fields.get('pay_insurance') or False)
        lt = pay_taxes + pay_costs + pay_insurance
        if lt == 3:
            doc.lease_type = 'triple-net'
        elif lt == 2:
            doc.lease_type = 'double-net'
        elif lt == 1:
            doc.lease_type = 'single-net'
        else:
            doc.lease_type = 'gross'

        # property type
        property_types = list(fields.get('property_types__set') or set())
        property_types.sort()
        doc.property_type = '; '.join(property_types)

        # permitted use
        doc.permitted_uses = fields.get('permitted_use')

        # prohibited use
        doc.prohibited_uses = ProcessLeaseDocuments.ordered_list_without_repetitions(
            fields.get('prohibited_use__list'))
        renew_duration_tuple = fields.get('renew_non_renew_notice')
        if renew_duration_tuple:
            doc.renew_non_renew_notice_duration = timedelta(days=renew_duration_tuple[2])

        auto_renew = fields.get('auto_renew')
        if auto_renew is not None:
            doc.auto_renew = auto_renew

        area_square_feet_list = fields.get('area_square_feet__list')
        if area_square_feet_list:
            doc.area_size_sq_ft = area_square_feet_list[0]

        doc.alterations_allowed = ProcessLeaseDocuments.ordered_list_without_repetitions(
            fields.get('alterations_allowed__list'))

        security_deposit = fields.get('security_deposit__set')
        if security_deposit:
            doc.security_deposit = max(security_deposit)

        doc.rent_due_frequency = fields.get('rent_due_frequency')

        mean_rent_per_month = fields.get('mean_rent_per_month__set')
        if mean_rent_per_month:
            doc.mean_rent_per_month = max(mean_rent_per_month)