def test_canonicalize_location_inst(): loc = Location(**raw) h.canonicalize_location_instance(loc) loc.save() actual = set(loc.__dict__.items()) desired_set = set(desired.items()) assert not (desired_set - actual)
def test_canonicalize_location_inst(): field_names = [f.name for f in Location._meta.fields] valid_fields = {k: raw[k] for k in raw if k in field_names} loc = Location(**valid_fields) h.canonicalize_location_instance(loc) loc.save() actual = set(loc.__dict__.items()) desired_set = set(desired_actual_field_names.items()) assert not (desired_set - actual)
def create_location(location_map, row, location_value_map=None): """ Create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ if location_value_map is None: location_value_map = {} row = canonicalize_location_dict(row) location_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True, save=False) return Location.objects.create(**location_data)
def get_or_create_location(location_map, row, location_value_map={}): """ Retrieve or create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ location_country = RefCountryCode.objects.filter( country_code=row[location_map.get('location_country_code')]).first() # temporary fix until broker is patched: remove later state_code = row.get(location_map.get('state_code')) if state_code is not None: # Fix for procurement data foreign provinces stored as state_code if location_country and location_country.country_code != "USA": location_value_map.update({'foreign_province': state_code}) location_value_map.update({'state_code': None}) else: location_value_map.update( {'state_code': state_code.replace('.', '')}) # end of temporary fix if location_country: location_value_map.update({ 'location_country_code': location_country, 'country_name': location_country.country_name }) else: # no country found for this code location_value_map.update({ 'location_country_code': None, 'country_name': None }) row = canonicalize_location_dict(row) location_data = load_data_into_model(Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True) del location_data[ 'data_source'] # hacky way to ensure we don't create a series of empty location records if len(location_data): try: location_object, created = Location.objects.get_or_create( **location_data, defaults={'data_source': 'DBR'}) except MultipleObjectsReturned: # incoming location data is so sparse that comparing it to existing locations # yielded multiple records. create a new location with this limited info. # note: this will need fixed up to prevent duplicate location records with the # same sparse data location_object = Location.objects.create(**location_data) created = True return location_object, created else: # record had no location information at all return None, None
def create_subaward(self, row, shared_award_mappings, award_type): """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made to satisfy codeclimate complexity issues) """ # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem # finding one or more parts of the shared data for it and we don't want to insert it. if row['internal_id'] in shared_award_mappings: shared_mappings = shared_award_mappings[row['internal_id']] prime_award_dict = {} if shared_mappings['award']: prime_award_dict['prime_recipient'] = shared_mappings['award'].recipient if prime_award_dict['prime_recipient']: prime_award_dict['prime_recipient_name'] = shared_mappings['award'].recipient.recipient_name prime_award_dict['business_categories'] = (shared_mappings['award'].recipient.business_categories or []) upper_case_dict_values(row) cfda = None # check if the key exists and if it isn't empty (only here for grants) if 'cfda_numbers' in row and row['cfda_numbers']: only_num = row['cfda_numbers'].split(' ') cfda = Cfda.objects.filter(program_number=only_num[0]).first() if award_type == 'procurement': le_location_map = location_d1_recipient_mapper(row) recipient_name = row['company_name'] parent_recipient_name = row['parent_company_name'] business_type_code = None business_types_description = row['bus_types'] else: le_location_map = location_d2_recipient_mapper(row) recipient_name = row['awardee_name'] parent_recipient_name = None business_type_code = None business_types_description = None if le_location_map["location_zip"]: le_location_map.update( zip4=le_location_map["location_zip"], zip5=le_location_map["location_zip"][:5], zip_last4=le_location_map["location_zip"][5:] ) le_location_map.pop("location_zip") recipient_location = Location(**le_location_map) recipient_location.pre_save() pop_value_map = pop_mapper(row) pop_value_map['place_of_performance_flag'] = True if pop_value_map["location_zip"]: pop_value_map.update( zip4=pop_value_map["location_zip"], zip5=pop_value_map["location_zip"][:5], zip_last4=pop_value_map["location_zip"][5:] ) pop_value_map.pop("location_zip") place_of_performance = Location(**pop_value_map) place_of_performance.pre_save() if not parent_recipient_name and row.get('parent_duns'): duns_obj = RecipientLookup.objects.filter(duns=row['parent_duns'], legal_business_name__isnull=False) \ .values('legal_business_name').first() if duns_obj: parent_recipient_name = duns_obj['legal_business_name'] subaward_dict = { 'award': shared_mappings['award'], 'recipient_unique_id': row['duns'], 'recipient_name': recipient_name, 'dba_name': row['dba_name'], 'parent_recipient_unique_id': row['parent_duns'], 'parent_recipient_name': parent_recipient_name, 'business_type_code': business_type_code, 'business_type_description': business_types_description, 'prime_recipient': prime_award_dict.get('prime_recipient', None), 'prime_recipient_name': prime_award_dict.get('prime_recipient_name', None), 'business_categories': prime_award_dict.get('business_categories', []), 'recipient_location_country_code': recipient_location.location_country_code, 'recipient_location_country_name': recipient_location.country_name, 'recipient_location_state_code': recipient_location.state_code, 'recipient_location_state_name': recipient_location.state_name, 'recipient_location_county_code': recipient_location.county_code, 'recipient_location_county_name': recipient_location.county_name, 'recipient_location_city_code': recipient_location.city_code, 'recipient_location_city_name': recipient_location.city_name, 'recipient_location_zip4': recipient_location.zip4, 'recipient_location_zip5': recipient_location.zip5, 'recipient_location_street_address': recipient_location.address_line1, 'recipient_location_congressional_code': recipient_location.congressional_code, 'recipient_location_foreign_postal_code': recipient_location.foreign_postal_code, 'officer_1_name': row['top_paid_fullname_1'], 'officer_1_amount': row['top_paid_amount_1'], 'officer_2_name': row['top_paid_fullname_2'], 'officer_2_amount': row['top_paid_amount_2'], 'officer_3_name': row['top_paid_fullname_3'], 'officer_3_amount': row['top_paid_amount_3'], 'officer_4_name': row['top_paid_fullname_4'], 'officer_4_amount': row['top_paid_amount_4'], 'officer_5_name': row['top_paid_fullname_5'], 'officer_5_amount': row['top_paid_amount_5'], 'data_source': "DBR", 'cfda': cfda, 'awarding_agency': shared_mappings['award'].awarding_agency if shared_mappings['award'] else None, 'funding_agency': shared_mappings['award'].funding_agency if shared_mappings['award'] else None, 'subaward_number': row['subaward_num'], 'amount': row['subaward_amount'], 'description': row['overall_description'], 'recovery_model_question1': row['q1_flag'], 'recovery_model_question2': row['q2_flag'], 'action_date': row['subaward_date'], 'award_report_fy_month': row['report_period_mon'], 'award_report_fy_year': row['report_period_year'], 'broker_award_id': row['id'], 'internal_id': row['internal_id'], 'award_type': award_type, 'pop_country_code': row['principle_place_country'], 'pop_country_name': place_of_performance.country_name, 'pop_state_code': row['principle_place_state'], 'pop_state_name': row['principle_place_state_name'], 'pop_county_code': place_of_performance.county_code, 'pop_county_name': place_of_performance.county_name, 'pop_city_code': place_of_performance.city_code, 'pop_city_name': row['principle_place_city'], 'pop_zip4': row['principle_place_zip'], 'pop_street_address': row['principle_place_street'], 'pop_congressional_code': row['principle_place_district'], 'updated_at': datetime.utcnow() } # Either we're starting with an empty table in regards to this award type or we've deleted all # subawards related to the internal_id, either way we just create the subaward Subaward.objects.create(**subaward_dict) if shared_mappings['award']: award_update_id_list.append(shared_mappings['award'].id)
def load_locations(self, fabs_broker_data, total_rows, pop_flag=False): start_time = datetime.now() for index, row in enumerate(fabs_broker_data, 1): if not (index % 10000): logger.info('Locations: Loading row {} of {} ({})'.format( str(index), str(total_rows), datetime.now() - start_time)) if pop_flag: location_value_map = {"place_of_performance_flag": True} field_map = pop_field_map else: location_value_map = {'recipient_flag': True} field_map = le_field_map row = canonicalize_location_dict(row) country_code = row[field_map.get('location_country_code')] pop_code = row[field_map.get( 'performance_code')] if pop_flag else None # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then # the country code is USA if pop_flag and not country_code and pop_code != '00FORGN': row[field_map.get('location_country_code')] = 'USA' # Get country code obj location_country_code_obj = self.country_code_map.get( row[field_map.get('location_country_code')]) # Fix state code periods state_code = row.get(field_map.get('state_code')) if state_code is not None: location_value_map.update( {'state_code': state_code.replace('.', '')}) if location_country_code_obj: location_value_map.update({ 'location_country_code': location_country_code_obj, 'country_name': location_country_code_obj.country_name }) if location_country_code_obj.country_code != 'USA': location_value_map.update({ 'state_code': None, 'state_name': None }) else: # no country found for this code location_value_map.update({ 'location_country_code': None, 'country_name': None }) location_instance_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=field_map, as_dict=True) loc_instance = Location(**location_instance_data) loc_instance.load_city_county_data() loc_instance.fill_missing_state_data() loc_instance.fill_missing_zip5() if pop_flag: pop_bulk.append(loc_instance) else: lel_bulk.append(loc_instance) if pop_flag: logger.info( 'Bulk creating POP Locations (batch_size: {})...'.format( BATCH_SIZE)) Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE) else: logger.info( 'Bulk creating LE Locations (batch_size: {})...'.format( BATCH_SIZE)) Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)
def get_or_create_location(location_map, row, location_value_map=None, empty_location=None, d_file=False, save=True): """ Retrieve or create a location object Input parameters: - location_map: a dictionary with key = field name on the location model and value = corresponding field name on the current row of data - row: the row of data currently being loaded """ if location_value_map is None: location_value_map = {} row = canonicalize_location_dict(row) # For only FABS if "place_of_performance_code" in row: # If the recipient's location country code is empty or it's 'UNITED STATES # OR the place of performance location country code is empty and the performance code isn't 00FORGN # OR the place of performance location country code is empty and there isn't a performance code # OR the country code is a US territory # THEN we can assume that the location country code is 'USA' if ('recipient_flag' in location_value_map and location_value_map['recipient_flag'] and (row[location_map.get('location_country_code')] is None or row[location_map.get('location_country_code')] == 'UNITED STATES')) or \ ('place_of_performance_flag' in location_value_map and location_value_map['place_of_performance_flag'] and row[location_map.get('location_country_code')] is None and "performance_code" in location_map and row[location_map["performance_code"]] != '00FORGN') or \ ('place_of_performance_flag' in location_value_map and location_value_map['place_of_performance_flag'] and row[location_map.get('location_country_code')] is None and "performance_code" not in location_map) or \ (row[location_map.get('location_country_code')] in territory_country_codes): row[location_map["location_country_code"]] = 'USA' state_code = row.get(location_map.get('state_code')) if state_code is not None: # Remove . in state names (i.e. D.C.) location_value_map.update({'state_code': state_code.replace('.', '')}) location_value_map.update({ 'location_country_code': location_map.get('location_country_code'), 'country_name': location_map.get('location_country_name'), 'state_code': None, # expired 'state_name': None, }) location_data = load_data_into_model(Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True) del location_data[ 'data_source'] # hacky way to ensure we don't create a series of empty location records if len(location_data): if len(location_data) == 1 and "place_of_performance_flag" in location_data and\ location_data["place_of_performance_flag"]: location_object = None created = False elif save: location_object = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=False, save=True) created = False else: location_object = load_data_into_model( Location(), row, value_map=location_value_map, field_map=location_map, as_dict=False) # location_object = Location.objects.create(**location_data) created = True return location_object, created else: # record had no location information at all return None, None
def load_locations(self, fpds_broker_data, total_rows, pop_flag=False): start_time = datetime.now() for index, row in enumerate(fpds_broker_data, 1): if not (index % 10000): logger.info('Locations: Loading row {} of {} ({})'.format(str(index), str(total_rows), datetime.now() - start_time)) if pop_flag: location_value_map = {"place_of_performance_flag": True} field_map = pop_field_map else: location_value_map = {'recipient_flag': True} field_map = le_field_map row = canonicalize_location_dict(row) # THIS ASSUMPTION DOES NOT HOLD FOR FPDS SINCE IT DOES NOT HAVE A PLACE OF PERFORMANCE CODE # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then # the country code is USA # if pop_flag and not country_code and pop_code != '00FORGN': # row[field_map.get('location_country_code')] = 'USA' # Get country code obj location_country_code_obj = self.country_code_map.get(row[field_map.get('location_country_code')]) # Fix state code periods state_code = row.get(field_map.get('state_code')) if state_code is not None: location_value_map.update({'state_code': state_code.replace('.', '')}) if location_country_code_obj: location_value_map.update({ 'location_country_code': location_country_code_obj, 'country_name': location_country_code_obj.country_name }) if location_country_code_obj.country_code != 'USA': location_value_map.update({ 'state_code': None, 'state_name': None }) else: # no country found for this code location_value_map.update({ 'location_country_code': None, 'country_name': None }) location_instance_data = load_data_into_model( Location(), row, value_map=location_value_map, field_map=field_map, as_dict=True) loc_instance = Location(**location_instance_data) loc_instance.load_city_county_data() loc_instance.fill_missing_state_data() loc_instance.fill_missing_zip5() if pop_flag: pop_bulk.append(loc_instance) else: lel_bulk.append(loc_instance) if pop_flag: logger.info('Bulk creating POP Locations (batch_size: {})...'.format(BATCH_SIZE)) Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE) else: logger.info('Bulk creating LE Locations (batch_size: {})...'.format(BATCH_SIZE)) Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)