Example #1
0
def test_canonicalize_location_inst():
    loc = Location(**raw)
    h.canonicalize_location_instance(loc)
    loc.save()
    actual = set(loc.__dict__.items())
    desired_set = set(desired.items())
    assert not (desired_set - actual)
def test_canonicalize_location_inst():
    field_names = [f.name for f in Location._meta.fields]
    valid_fields = {k: raw[k] for k in raw if k in field_names}
    loc = Location(**valid_fields)
    h.canonicalize_location_instance(loc)
    loc.save()
    actual = set(loc.__dict__.items())
    desired_set = set(desired_actual_field_names.items())
    assert not (desired_set - actual)
def create_location(location_map, row, location_value_map=None):
    """
    Create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model and value = corresponding field name
          on the current row of data
        - row: the row of data currently being loaded
    """
    if location_value_map is None:
        location_value_map = {}

    row = canonicalize_location_dict(row)
    location_data = load_data_into_model(
        Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True, save=False)

    return Location.objects.create(**location_data)
def get_or_create_location(location_map, row, location_value_map={}):
    """
    Retrieve or create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model
            and value = corresponding field name on the current row of data
        - row: the row of data currently being loaded
    """
    location_country = RefCountryCode.objects.filter(
        country_code=row[location_map.get('location_country_code')]).first()

    # temporary fix until broker is patched: remove later
    state_code = row.get(location_map.get('state_code'))
    if state_code is not None:
        # Fix for procurement data foreign provinces stored as state_code
        if location_country and location_country.country_code != "USA":
            location_value_map.update({'foreign_province': state_code})
            location_value_map.update({'state_code': None})
        else:
            location_value_map.update(
                {'state_code': state_code.replace('.', '')})
    # end of temporary fix

    if location_country:
        location_value_map.update({
            'location_country_code': location_country,
            'country_name': location_country.country_name
        })
    else:
        # no country found for this code
        location_value_map.update({
            'location_country_code': None,
            'country_name': None
        })

    row = canonicalize_location_dict(row)

    location_data = load_data_into_model(Location(),
                                         row,
                                         value_map=location_value_map,
                                         field_map=location_map,
                                         as_dict=True)

    del location_data[
        'data_source']  # hacky way to ensure we don't create a series of empty location records
    if len(location_data):
        try:
            location_object, created = Location.objects.get_or_create(
                **location_data, defaults={'data_source': 'DBR'})
        except MultipleObjectsReturned:
            # incoming location data is so sparse that comparing it to existing locations
            # yielded multiple records. create a new location with this limited info.
            # note: this will need fixed up to prevent duplicate location records with the
            # same sparse data
            location_object = Location.objects.create(**location_data)
            created = True
        return location_object, created
    else:
        # record had no location information at all
        return None, None
Example #5
0
    def create_subaward(self, row, shared_award_mappings, award_type):
        """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made
            to satisfy codeclimate complexity issues)
        """

        # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem
        # finding one or more parts of the shared data for it and we don't want to insert it.
        if row['internal_id'] in shared_award_mappings:
            shared_mappings = shared_award_mappings[row['internal_id']]

            prime_award_dict = {}
            if shared_mappings['award']:
                prime_award_dict['prime_recipient'] = shared_mappings['award'].recipient
                if prime_award_dict['prime_recipient']:
                    prime_award_dict['prime_recipient_name'] = shared_mappings['award'].recipient.recipient_name
                    prime_award_dict['business_categories'] = (shared_mappings['award'].recipient.business_categories
                                                               or [])

            upper_case_dict_values(row)

            cfda = None
            # check if the key exists and if it isn't empty (only here for grants)
            if 'cfda_numbers' in row and row['cfda_numbers']:
                only_num = row['cfda_numbers'].split(' ')
                cfda = Cfda.objects.filter(program_number=only_num[0]).first()

            if award_type == 'procurement':
                le_location_map = location_d1_recipient_mapper(row)
                recipient_name = row['company_name']
                parent_recipient_name = row['parent_company_name']
                business_type_code = None
                business_types_description = row['bus_types']
            else:
                le_location_map = location_d2_recipient_mapper(row)
                recipient_name = row['awardee_name']
                parent_recipient_name = None
                business_type_code = None
                business_types_description = None

            if le_location_map["location_zip"]:
                le_location_map.update(
                    zip4=le_location_map["location_zip"],
                    zip5=le_location_map["location_zip"][:5],
                    zip_last4=le_location_map["location_zip"][5:]
                )

            le_location_map.pop("location_zip")
            recipient_location = Location(**le_location_map)
            recipient_location.pre_save()

            pop_value_map = pop_mapper(row)
            pop_value_map['place_of_performance_flag'] = True

            if pop_value_map["location_zip"]:
                pop_value_map.update(
                    zip4=pop_value_map["location_zip"],
                    zip5=pop_value_map["location_zip"][:5],
                    zip_last4=pop_value_map["location_zip"][5:]
                )

            pop_value_map.pop("location_zip")
            place_of_performance = Location(**pop_value_map)
            place_of_performance.pre_save()

            if not parent_recipient_name and row.get('parent_duns'):
                duns_obj = RecipientLookup.objects.filter(duns=row['parent_duns'], legal_business_name__isnull=False) \
                    .values('legal_business_name').first()
                if duns_obj:
                    parent_recipient_name = duns_obj['legal_business_name']

            subaward_dict = {
                'award': shared_mappings['award'],
                'recipient_unique_id': row['duns'],
                'recipient_name': recipient_name,
                'dba_name': row['dba_name'],
                'parent_recipient_unique_id': row['parent_duns'],
                'parent_recipient_name': parent_recipient_name,
                'business_type_code': business_type_code,
                'business_type_description': business_types_description,

                'prime_recipient': prime_award_dict.get('prime_recipient', None),
                'prime_recipient_name': prime_award_dict.get('prime_recipient_name', None),
                'business_categories': prime_award_dict.get('business_categories', []),

                'recipient_location_country_code': recipient_location.location_country_code,
                'recipient_location_country_name': recipient_location.country_name,
                'recipient_location_state_code': recipient_location.state_code,
                'recipient_location_state_name': recipient_location.state_name,
                'recipient_location_county_code': recipient_location.county_code,
                'recipient_location_county_name': recipient_location.county_name,
                'recipient_location_city_code': recipient_location.city_code,
                'recipient_location_city_name': recipient_location.city_name,
                'recipient_location_zip4': recipient_location.zip4,
                'recipient_location_zip5': recipient_location.zip5,
                'recipient_location_street_address': recipient_location.address_line1,
                'recipient_location_congressional_code': recipient_location.congressional_code,
                'recipient_location_foreign_postal_code': recipient_location.foreign_postal_code,

                'officer_1_name': row['top_paid_fullname_1'],
                'officer_1_amount': row['top_paid_amount_1'],
                'officer_2_name': row['top_paid_fullname_2'],
                'officer_2_amount': row['top_paid_amount_2'],
                'officer_3_name': row['top_paid_fullname_3'],
                'officer_3_amount': row['top_paid_amount_3'],
                'officer_4_name': row['top_paid_fullname_4'],
                'officer_4_amount': row['top_paid_amount_4'],
                'officer_5_name': row['top_paid_fullname_5'],
                'officer_5_amount': row['top_paid_amount_5'],

                'data_source': "DBR",
                'cfda': cfda,
                'awarding_agency': shared_mappings['award'].awarding_agency if shared_mappings['award'] else None,
                'funding_agency': shared_mappings['award'].funding_agency if shared_mappings['award'] else None,
                'subaward_number': row['subaward_num'],
                'amount': row['subaward_amount'],
                'description': row['overall_description'],
                'recovery_model_question1': row['q1_flag'],
                'recovery_model_question2': row['q2_flag'],
                'action_date': row['subaward_date'],
                'award_report_fy_month': row['report_period_mon'],
                'award_report_fy_year': row['report_period_year'],
                'broker_award_id': row['id'],
                'internal_id': row['internal_id'],
                'award_type': award_type,

                'pop_country_code': row['principle_place_country'],
                'pop_country_name': place_of_performance.country_name,
                'pop_state_code': row['principle_place_state'],
                'pop_state_name': row['principle_place_state_name'],
                'pop_county_code': place_of_performance.county_code,
                'pop_county_name': place_of_performance.county_name,
                'pop_city_code': place_of_performance.city_code,
                'pop_city_name': row['principle_place_city'],
                'pop_zip4': row['principle_place_zip'],
                'pop_street_address': row['principle_place_street'],
                'pop_congressional_code': row['principle_place_district'],
                'updated_at': datetime.utcnow()
            }

            # Either we're starting with an empty table in regards to this award type or we've deleted all
            # subawards related to the internal_id, either way we just create the subaward
            Subaward.objects.create(**subaward_dict)
            if shared_mappings['award']:
                award_update_id_list.append(shared_mappings['award'].id)
Example #6
0
    def load_locations(self, fabs_broker_data, total_rows, pop_flag=False):

        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info('Locations: Loading row {} of {} ({})'.format(
                    str(index), str(total_rows),
                    datetime.now() - start_time))
            if pop_flag:
                location_value_map = {"place_of_performance_flag": True}
                field_map = pop_field_map
            else:
                location_value_map = {'recipient_flag': True}
                field_map = le_field_map

            row = canonicalize_location_dict(row)

            country_code = row[field_map.get('location_country_code')]
            pop_code = row[field_map.get(
                'performance_code')] if pop_flag else None

            # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then
            # the country code is USA
            if pop_flag and not country_code and pop_code != '00FORGN':
                row[field_map.get('location_country_code')] = 'USA'

            # Get country code obj
            location_country_code_obj = self.country_code_map.get(
                row[field_map.get('location_country_code')])

            # Fix state code periods
            state_code = row.get(field_map.get('state_code'))
            if state_code is not None:
                location_value_map.update(
                    {'state_code': state_code.replace('.', '')})

            if location_country_code_obj:
                location_value_map.update({
                    'location_country_code':
                    location_country_code_obj,
                    'country_name':
                    location_country_code_obj.country_name
                })

                if location_country_code_obj.country_code != 'USA':
                    location_value_map.update({
                        'state_code': None,
                        'state_name': None
                    })
            else:
                # no country found for this code
                location_value_map.update({
                    'location_country_code': None,
                    'country_name': None
                })

            location_instance_data = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=field_map,
                as_dict=True)

            loc_instance = Location(**location_instance_data)
            loc_instance.load_city_county_data()
            loc_instance.fill_missing_state_data()
            loc_instance.fill_missing_zip5()

            if pop_flag:
                pop_bulk.append(loc_instance)
            else:
                lel_bulk.append(loc_instance)

        if pop_flag:
            logger.info(
                'Bulk creating POP Locations (batch_size: {})...'.format(
                    BATCH_SIZE))
            Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE)
        else:
            logger.info(
                'Bulk creating LE Locations (batch_size: {})...'.format(
                    BATCH_SIZE))
            Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)
Example #7
0
def get_or_create_location(location_map,
                           row,
                           location_value_map=None,
                           empty_location=None,
                           d_file=False,
                           save=True):
    """
    Retrieve or create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model
            and value = corresponding field name on the current row of data
        - row: the row of data currently being loaded
    """
    if location_value_map is None:
        location_value_map = {}

    row = canonicalize_location_dict(row)

    # For only FABS
    if "place_of_performance_code" in row:
        # If the recipient's location country code is empty or it's 'UNITED STATES
        # OR the place of performance location country code is empty and the performance code isn't 00FORGN
        # OR the place of performance location country code is empty and there isn't a performance code
        # OR the country code is a US territory
        # THEN we can assume that the location country code is 'USA'
        if ('recipient_flag' in location_value_map and location_value_map['recipient_flag'] and
                (row[location_map.get('location_country_code')] is None or
                    row[location_map.get('location_country_code')] == 'UNITED STATES')) or \
                ('place_of_performance_flag' in location_value_map and
                    location_value_map['place_of_performance_flag'] and
                    row[location_map.get('location_country_code')] is None and
                    "performance_code" in location_map and row[location_map["performance_code"]] != '00FORGN') or \
                ('place_of_performance_flag' in location_value_map and
                    location_value_map['place_of_performance_flag'] and
                    row[location_map.get('location_country_code')] is None and
                    "performance_code" not in location_map) or \
                (row[location_map.get('location_country_code')] in territory_country_codes):
            row[location_map["location_country_code"]] = 'USA'

    state_code = row.get(location_map.get('state_code'))
    if state_code is not None:
        # Remove . in state names (i.e. D.C.)
        location_value_map.update({'state_code': state_code.replace('.', '')})

    location_value_map.update({
        'location_country_code':
        location_map.get('location_country_code'),
        'country_name':
        location_map.get('location_country_name'),
        'state_code':
        None,  # expired
        'state_name':
        None,
    })

    location_data = load_data_into_model(Location(),
                                         row,
                                         value_map=location_value_map,
                                         field_map=location_map,
                                         as_dict=True)

    del location_data[
        'data_source']  # hacky way to ensure we don't create a series of empty location records
    if len(location_data):

        if len(location_data) == 1 and "place_of_performance_flag" in location_data and\
                location_data["place_of_performance_flag"]:
            location_object = None
            created = False
        elif save:
            location_object = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=location_map,
                as_dict=False,
                save=True)
            created = False
        else:
            location_object = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=location_map,
                as_dict=False)
            # location_object = Location.objects.create(**location_data)
            created = True

        return location_object, created
    else:
        # record had no location information at all
        return None, None
    def load_locations(self, fpds_broker_data, total_rows, pop_flag=False):

        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Locations: Loading row {} of {} ({})'.format(str(index),
                                                                          str(total_rows),
                                                                          datetime.now() - start_time))
            if pop_flag:
                location_value_map = {"place_of_performance_flag": True}
                field_map = pop_field_map
            else:
                location_value_map = {'recipient_flag': True}
                field_map = le_field_map

            row = canonicalize_location_dict(row)

            # THIS ASSUMPTION DOES NOT HOLD FOR FPDS SINCE IT DOES NOT HAVE A PLACE OF PERFORMANCE CODE
            # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then
            # the country code is USA
            # if pop_flag and not country_code and pop_code != '00FORGN':
            #     row[field_map.get('location_country_code')] = 'USA'

            # Get country code obj
            location_country_code_obj = self.country_code_map.get(row[field_map.get('location_country_code')])

            # Fix state code periods
            state_code = row.get(field_map.get('state_code'))
            if state_code is not None:
                location_value_map.update({'state_code': state_code.replace('.', '')})

            if location_country_code_obj:
                location_value_map.update({
                    'location_country_code': location_country_code_obj,
                    'country_name': location_country_code_obj.country_name
                })

                if location_country_code_obj.country_code != 'USA':
                    location_value_map.update({
                        'state_code': None,
                        'state_name': None
                    })
            else:
                # no country found for this code
                location_value_map.update({
                    'location_country_code': None,
                    'country_name': None
                })

            location_instance_data = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=field_map,
                as_dict=True)

            loc_instance = Location(**location_instance_data)
            loc_instance.load_city_county_data()
            loc_instance.fill_missing_state_data()
            loc_instance.fill_missing_zip5()

            if pop_flag:
                pop_bulk.append(loc_instance)
            else:
                lel_bulk.append(loc_instance)

        if pop_flag:
            logger.info('Bulk creating POP Locations (batch_size: {})...'.format(BATCH_SIZE))
            Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE)
        else:
            logger.info('Bulk creating LE Locations (batch_size: {})...'.format(BATCH_SIZE))
            Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)