def load_field_values(task: ExtendedTask, document: Document, document_fields: dict, field_owners: dict) -> Dict: document_type = document.document_type fields_to_values = {} if not document_type: return fields_to_values field_code_aliases = document_type.field_code_aliases field_codes_to_fields = { f.code.lower(): f for f in document_type.fields.all() } if field_code_aliases: field_codes_to_fields.update({ field_alias.lower(): field_codes_to_fields.get(field_code.lower()) for field_alias, field_code in field_code_aliases.items() if field_alias and field_code }) def _maybe_add_val(f, v, owner): if v is None: return v = DetectedFieldValue(f, v, user=owner) prev = fields_to_values.get(f) if not prev: fields_to_values[f] = [v] else: prev.append(v) for field_alias, field_value_text in document_fields.items(): if field_value_text is None: continue field = field_codes_to_fields.get(field_alias.lower()) if not field: task.log_warn( 'Field alias "{0}" not found for document type {1}'.format( field_alias, document_type.code)) continue field_type_adapter = FIELD_TYPES_REGISTRY.get( field.type) # type: FieldType field_owner = field_owners.get(field_alias) if type(field_value_text) is list: for possible_value_text in list(field_value_text): maybe_value = field_type_adapter.extract_from_possible_value_text( field, possible_value_text) _maybe_add_val(field, maybe_value, field_owner) else: maybe_value = field_type_adapter.extract_from_possible_value_text( field, field_value_text) _maybe_add_val(field, maybe_value, field_owner) return fields_to_values
def load_field_values(task: ExtendedTask, document: Document, document_fields_alias_to_value: Dict[str, Any]) \ -> Dict[DocumentField, FieldValueDTO]: document_type = document.document_type fields_to_values = dict() # type: Dict[DocumentField, FieldValueDTO] if not document_type: return fields_to_values field_code_aliases = document_type.field_code_aliases field_codes_to_fields = {f.code.lower(): f for f in document_type.fields.all()} if field_code_aliases: field_codes_to_fields.update({field_alias.lower(): field_codes_to_fields.get(field_code.lower()) for field_alias, field_code in field_code_aliases.items() if field_alias and field_code}) for field_alias, field_value_text in document_fields_alias_to_value.items(): if field_value_text is None: continue field = field_codes_to_fields.get(field_alias.lower()) # type: DocumentField if not field: task.log_warn( 'Field alias "{0}" not found for document type {1}'.format(field_alias, document_type.code)) continue typed_field = TypedField.by(field) # type: TypedField if type(field_value_text) is list: for possible_value_text in list(field_value_text): maybe_value = typed_field.extract_from_possible_value_text(possible_value_text) if maybe_value: maybe_value = typed_field.field_value_python_to_json(maybe_value) fields_to_values[field] = FieldValueDTO(field_value=maybe_value) break else: maybe_value = typed_field.extract_from_possible_value_text(field_value_text) if maybe_value: maybe_value = typed_field.field_value_python_to_json(maybe_value) fields_to_values[field] = FieldValueDTO(field_value=maybe_value) return fields_to_values
def process_fields(doc: LeaseDocument, doc_text: str, task: ExtendedTask): sentences = get_sentence_list(doc_text) # fields = detect_fields(sentences, groups=('address',)) fields = detect_fields(sentences) doc.address = fields.get('address') if not doc.address: doc.address = detect_address_default(doc_text, sentences) if doc.address: g = geocoder.google(doc.address) if g.ok: doc.address_latitude = g.lat doc.address_longitude = g.lng doc.address_country = g.country_long doc.address_state_province = g.province_long elif g.status and 'ZERO' in g.status: # Google does not know such address - probably we detected it wrong. doc.address = None doc.address_state_province = None doc.address_country = None doc.address_longitude = None doc.address_latitude = None else: task.log_warn( 'Google did not return geocode info for: {0}\nResponse: {1}'.format(doc.address, g)) # return # term doc.commencement_date = fields.get('commencement_date') doc.expiration_date = fields.get('expiration_date') term_tuple = fields.get('term') if term_tuple: term = timedelta(days=term_tuple[2]) if doc.commencement_date and not doc.expiration_date: doc.expiration_date = doc.commencement_date + term elif not doc.commencement_date and doc.expiration_date: doc.commencement_date = doc.expiration_date - term if doc.commencement_date \ and doc.expiration_date \ and doc.commencement_date >= doc.expiration_date: doc.expiration_date = None # lease type pay_taxes = int(fields.get('pay_taxes') or False) pay_costs = int(fields.get('pay_costs') or False) pay_insurance = int(fields.get('pay_insurance') or False) lt = pay_taxes + pay_costs + pay_insurance if lt == 3: doc.lease_type = 'triple-net' elif lt == 2: doc.lease_type = 'double-net' elif lt == 1: doc.lease_type = 'single-net' else: doc.lease_type = 'gross' # property type property_types = list(fields.get('property_types__set') or set()) property_types.sort() doc.property_type = '; '.join(property_types) # permitted use doc.permitted_uses = fields.get('permitted_use') # prohibited use doc.prohibited_uses = ProcessLeaseDocuments.ordered_list_without_repetitions( fields.get('prohibited_use__list')) renew_duration_tuple = fields.get('renew_non_renew_notice') if renew_duration_tuple: doc.renew_non_renew_notice_duration = timedelta(days=renew_duration_tuple[2]) auto_renew = fields.get('auto_renew') if auto_renew is not None: doc.auto_renew = auto_renew area_square_feet_list = fields.get('area_square_feet__list') if area_square_feet_list: doc.area_size_sq_ft = area_square_feet_list[0] doc.alterations_allowed = ProcessLeaseDocuments.ordered_list_without_repetitions( fields.get('alterations_allowed__list')) security_deposit = fields.get('security_deposit__set') if security_deposit: doc.security_deposit = max(security_deposit) doc.rent_due_frequency = fields.get('rent_due_frequency') mean_rent_per_month = fields.get('mean_rent_per_month__set') if mean_rent_per_month: doc.mean_rent_per_month = max(mean_rent_per_month)