def process_fields(doc: LeaseDocument, doc_text: str, task: ExtendedTask): sentences = get_sentence_list(doc_text) # fields = detect_fields(sentences, groups=('address',)) fields = detect_fields(sentences) doc.address = fields.get('address') if not doc.address: doc.address = detect_address_default(doc_text, sentences) # term doc.commencement_date = fields.get('commencement_date') doc.expiration_date = fields.get('expiration_date') term_tuple = fields.get('term') if term_tuple: term = timedelta(days=term_tuple[2]) if doc.commencement_date and not doc.expiration_date: doc.expiration_date = doc.commencement_date + term elif not doc.commencement_date and doc.expiration_date: doc.commencement_date = doc.expiration_date - term if doc.commencement_date \ and doc.expiration_date \ and doc.commencement_date >= doc.expiration_date: doc.expiration_date = None # lease type pay_taxes = int(fields.get('pay_taxes') or False) pay_costs = int(fields.get('pay_costs') or False) pay_insurance = int(fields.get('pay_insurance') or False) lt = pay_taxes + pay_costs + pay_insurance if lt == 3: doc.lease_type = 'triple-net' elif lt == 2: doc.lease_type = 'double-net' elif lt == 1: doc.lease_type = 'single-net' else: doc.lease_type = 'gross' # property type property_types = list(fields.get('property_types__set') or set()) property_types.sort() doc.property_type = '; '.join(property_types) # permitted use doc.permitted_uses = fields.get('permitted_use') # prohibited use doc.prohibited_uses = ProcessLeaseDocuments.ordered_list_without_repetitions( fields.get('prohibited_use__list')) renew_duration_tuple = fields.get('renew_non_renew_notice') if renew_duration_tuple: doc.renew_non_renew_notice_duration = timedelta(days=renew_duration_tuple[2]) auto_renew = fields.get('auto_renew') if auto_renew is not None: doc.auto_renew = auto_renew area_square_feet_list = fields.get('area_square_feet__list') if area_square_feet_list: doc.area_size_sq_ft = area_square_feet_list[0] doc.alterations_allowed = ProcessLeaseDocuments.ordered_list_without_repetitions( fields.get('alterations_allowed__list')) security_deposit = fields.get('security_deposit__set') if security_deposit: doc.security_deposit = max(security_deposit) doc.rent_due_frequency = fields.get('rent_due_frequency') mean_rent_per_month = fields.get('mean_rent_per_month__set') if mean_rent_per_month: doc.mean_rent_per_month = max(mean_rent_per_month)
def process_fields(doc: LeaseDocument, doc_text: str, task: ExtendedTask): sentences = get_sentence_list(doc_text) # fields = detect_fields(sentences, groups=('address',)) fields = detect_fields(sentences) doc.address = fields.get('address') if not doc.address: doc.address = detect_address_default(doc_text, sentences) if doc.address: g = geocoder.google(doc.address) if g.ok: doc.address_latitude = g.lat doc.address_longitude = g.lng doc.address_country = g.country_long doc.address_state_province = g.province_long elif g.status and 'ZERO' in g.status: # Google does not know such address - probably we detected it wrong. doc.address = None doc.address_state_province = None doc.address_country = None doc.address_longitude = None doc.address_latitude = None else: task.log_warn( 'Google did not return geocode info for: {0}\nResponse: {1}'.format(doc.address, g)) # return # term doc.commencement_date = fields.get('commencement_date') doc.expiration_date = fields.get('expiration_date') term_tuple = fields.get('term') if term_tuple: term = timedelta(days=term_tuple[2]) if doc.commencement_date and not doc.expiration_date: doc.expiration_date = doc.commencement_date + term elif not doc.commencement_date and doc.expiration_date: doc.commencement_date = doc.expiration_date - term if doc.commencement_date \ and doc.expiration_date \ and doc.commencement_date >= doc.expiration_date: doc.expiration_date = None # lease type pay_taxes = int(fields.get('pay_taxes') or False) pay_costs = int(fields.get('pay_costs') or False) pay_insurance = int(fields.get('pay_insurance') or False) lt = pay_taxes + pay_costs + pay_insurance if lt == 3: doc.lease_type = 'triple-net' elif lt == 2: doc.lease_type = 'double-net' elif lt == 1: doc.lease_type = 'single-net' else: doc.lease_type = 'gross' # property type property_types = list(fields.get('property_types__set') or set()) property_types.sort() doc.property_type = '; '.join(property_types) # permitted use doc.permitted_uses = fields.get('permitted_use') # prohibited use doc.prohibited_uses = ProcessLeaseDocuments.ordered_list_without_repetitions( fields.get('prohibited_use__list')) renew_duration_tuple = fields.get('renew_non_renew_notice') if renew_duration_tuple: doc.renew_non_renew_notice_duration = timedelta(days=renew_duration_tuple[2]) auto_renew = fields.get('auto_renew') if auto_renew is not None: doc.auto_renew = auto_renew area_square_feet_list = fields.get('area_square_feet__list') if area_square_feet_list: doc.area_size_sq_ft = area_square_feet_list[0] doc.alterations_allowed = ProcessLeaseDocuments.ordered_list_without_repetitions( fields.get('alterations_allowed__list')) security_deposit = fields.get('security_deposit__set') if security_deposit: doc.security_deposit = max(security_deposit) doc.rent_due_frequency = fields.get('rent_due_frequency') mean_rent_per_month = fields.get('mean_rent_per_month__set') if mean_rent_per_month: doc.mean_rent_per_month = max(mean_rent_per_month)