def test_get_or_create_location_creates_new_locations():
    """If no location is found, we create a new one"""
    ref = mommy.make(
        'references.RefCountryCode', country_code='USA', _fill_optional=True)
    row = dict(
        vendorcountrycode='USA',
        zipcode='12345-6789',
        streetaddress='Addy1',
        streetaddress2='Addy2',
        streetaddress3=None,
        vendor_state_code='ST',
        city='My Town')

    # this canonicalization step runs during load_submission, also
    row = canonicalize_location_dict(row)

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row, load_usaspending_contracts.location_mapper_vendor)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == ref
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.address_line1 == 'ADDY1'
    assert loc.address_line2 == 'ADDY2'
    assert loc.address_line3 is None
    assert loc.state_code == 'ST'
    assert loc.city_name == 'MY TOWN'
def get_or_create_location(row, mapper):
    location_dict = mapper(row)

    country_code = fetch_country_code(location_dict["location_country_code"])
    location_dict["location_country_code"] = country_code

    # Country-specific adjustments
    if country_code and country_code.country_code == "USA":
        location_dict.update(zip5=location_dict["location_zip"][:5],
                             zip_last4=location_dict["location_zip"][5:])
        location_dict.pop("location_zip")
    else:
        location_dict.update(
            foreign_postal_code=location_dict.pop("location_zip", None),
            foreign_province=location_dict.pop("state_code", None))
        if "city_name" in location_dict:
            location_dict['foreign_city_name'] = location_dict.pop("city_name")

    location_dict = canonicalize_location_dict(location_dict)

    location_tup = tuple(location_dict.items())
    location = location_cache.get(location_tup)
    if location:
        return location

    location = Location.objects.filter(**location_dict).first()
    if not location:
        location = Location.objects.create(**location_dict)
        location_cache.set(location_tup, location)
    return location
def test_get_or_create_location_creates_new_locations():
    """If no location is found, we create a new one"""
    row = dict(
        vendorcountrycode='USA',
        zipcode='12345-6789',
        streetaddress='Addy1',
        streetaddress2='Addy2',
        streetaddress3=None,
        vendor_state_code='ST',
        city='My Town')

    # this canonicalization step runs during load_submission, also
    row = canonicalize_location_dict(row)

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row, load_usaspending_contracts.location_mapper_vendor)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == 'USA'
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.address_line1 == 'ADDY1'
    assert loc.address_line2 == 'ADDY2'
    assert loc.address_line3 is None
    assert loc.state_code == 'ST'
    assert loc.city_name == 'MY TOWN'
def create_location(location_map, row, location_value_map=None):
    """
    Create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model and value = corresponding field name
          on the current row of data
        - row: the row of data currently being loaded
    """
    if location_value_map is None:
        location_value_map = {}

    row = canonicalize_location_dict(row)
    location_data = load_data_into_model(
        Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True, save=False)

    return Location.objects.create(**location_data)
def create_location(location_map, row, location_value_map=None):
    """
    Create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model and value = corresponding field name
          on the current row of data
        - row: the row of data currently being loaded
    """
    if location_value_map is None:
        location_value_map = {}

    row = canonicalize_location_dict(row)
    location_data = load_data_into_model(
        Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True, save=False)

    return Location.objects.create(**location_data)
def get_or_create_location(location_map, row, location_value_map={}):
    """
    Retrieve or create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model
            and value = corresponding field name on the current row of data
        - row: the row of data currently being loaded
    """
    location_country = RefCountryCode.objects.filter(
        country_code=row[location_map.get('location_country_code')]).first()

    # temporary fix until broker is patched: remove later
    state_code = row.get(location_map.get('state_code'))
    if state_code is not None:
        # Fix for procurement data foreign provinces stored as state_code
        if location_country and location_country.country_code != "USA":
            location_value_map.update({'foreign_province': state_code})
            location_value_map.update({'state_code': None})
        else:
            location_value_map.update(
                {'state_code': state_code.replace('.', '')})
    # end of temporary fix

    if location_country:
        location_value_map.update({
            'location_country_code': location_country,
            'country_name': location_country.country_name
        })
    else:
        # no country found for this code
        location_value_map.update({
            'location_country_code': None,
            'country_name': None
        })

    row = canonicalize_location_dict(row)

    location_data = load_data_into_model(Location(),
                                         row,
                                         value_map=location_value_map,
                                         field_map=location_map,
                                         as_dict=True)

    del location_data[
        'data_source']  # hacky way to ensure we don't create a series of empty location records
    if len(location_data):
        try:
            location_object, created = Location.objects.get_or_create(
                **location_data, defaults={'data_source': 'DBR'})
        except MultipleObjectsReturned:
            # incoming location data is so sparse that comparing it to existing locations
            # yielded multiple records. create a new location with this limited info.
            # note: this will need fixed up to prevent duplicate location records with the
            # same sparse data
            location_object = Location.objects.create(**location_data)
            created = True
        return location_object, created
    else:
        # record had no location information at all
        return None, None
def test_canonicalize_location_dict():
    assert h.canonicalize_location_dict(raw) == desired
Beispiel #8
0
    def load_locations(self, fabs_broker_data, total_rows, pop_flag=False):

        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info('Locations: Loading row {} of {} ({})'.format(
                    str(index), str(total_rows),
                    datetime.now() - start_time))
            if pop_flag:
                location_value_map = {"place_of_performance_flag": True}
                field_map = pop_field_map
            else:
                location_value_map = {'recipient_flag': True}
                field_map = le_field_map

            row = canonicalize_location_dict(row)

            country_code = row[field_map.get('location_country_code')]
            pop_code = row[field_map.get(
                'performance_code')] if pop_flag else None

            # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then
            # the country code is USA
            if pop_flag and not country_code and pop_code != '00FORGN':
                row[field_map.get('location_country_code')] = 'USA'

            # Get country code obj
            location_country_code_obj = self.country_code_map.get(
                row[field_map.get('location_country_code')])

            # Fix state code periods
            state_code = row.get(field_map.get('state_code'))
            if state_code is not None:
                location_value_map.update(
                    {'state_code': state_code.replace('.', '')})

            if location_country_code_obj:
                location_value_map.update({
                    'location_country_code':
                    location_country_code_obj,
                    'country_name':
                    location_country_code_obj.country_name
                })

                if location_country_code_obj.country_code != 'USA':
                    location_value_map.update({
                        'state_code': None,
                        'state_name': None
                    })
            else:
                # no country found for this code
                location_value_map.update({
                    'location_country_code': None,
                    'country_name': None
                })

            location_instance_data = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=field_map,
                as_dict=True)

            loc_instance = Location(**location_instance_data)
            loc_instance.load_city_county_data()
            loc_instance.fill_missing_state_data()
            loc_instance.fill_missing_zip5()

            if pop_flag:
                pop_bulk.append(loc_instance)
            else:
                lel_bulk.append(loc_instance)

        if pop_flag:
            logger.info(
                'Bulk creating POP Locations (batch_size: {})...'.format(
                    BATCH_SIZE))
            Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE)
        else:
            logger.info(
                'Bulk creating LE Locations (batch_size: {})...'.format(
                    BATCH_SIZE))
            Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)
Beispiel #9
0
def get_or_create_location(location_map,
                           row,
                           location_value_map=None,
                           empty_location=None,
                           d_file=False,
                           save=True):
    """
    Retrieve or create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model
            and value = corresponding field name on the current row of data
        - row: the row of data currently being loaded
    """
    if location_value_map is None:
        location_value_map = {}

    row = canonicalize_location_dict(row)

    # For only FABS
    if "place_of_performance_code" in row:
        # If the recipient's location country code is empty or it's 'UNITED STATES
        # OR the place of performance location country code is empty and the performance code isn't 00FORGN
        # OR the place of performance location country code is empty and there isn't a performance code
        # OR the country code is a US territory
        # THEN we can assume that the location country code is 'USA'
        if ('recipient_flag' in location_value_map and location_value_map['recipient_flag'] and
                (row[location_map.get('location_country_code')] is None or
                    row[location_map.get('location_country_code')] == 'UNITED STATES')) or \
                ('place_of_performance_flag' in location_value_map and
                    location_value_map['place_of_performance_flag'] and
                    row[location_map.get('location_country_code')] is None and
                    "performance_code" in location_map and row[location_map["performance_code"]] != '00FORGN') or \
                ('place_of_performance_flag' in location_value_map and
                    location_value_map['place_of_performance_flag'] and
                    row[location_map.get('location_country_code')] is None and
                    "performance_code" not in location_map) or \
                (row[location_map.get('location_country_code')] in territory_country_codes):
            row[location_map["location_country_code"]] = 'USA'

    state_code = row.get(location_map.get('state_code'))
    if state_code is not None:
        # Remove . in state names (i.e. D.C.)
        location_value_map.update({'state_code': state_code.replace('.', '')})

    location_value_map.update({
        'location_country_code':
        location_map.get('location_country_code'),
        'country_name':
        location_map.get('location_country_name'),
        'state_code':
        None,  # expired
        'state_name':
        None,
    })

    location_data = load_data_into_model(Location(),
                                         row,
                                         value_map=location_value_map,
                                         field_map=location_map,
                                         as_dict=True)

    del location_data[
        'data_source']  # hacky way to ensure we don't create a series of empty location records
    if len(location_data):

        if len(location_data) == 1 and "place_of_performance_flag" in location_data and\
                location_data["place_of_performance_flag"]:
            location_object = None
            created = False
        elif save:
            location_object = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=location_map,
                as_dict=False,
                save=True)
            created = False
        else:
            location_object = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=location_map,
                as_dict=False)
            # location_object = Location.objects.create(**location_data)
            created = True

        return location_object, created
    else:
        # record had no location information at all
        return None, None
    def load_locations(self, fpds_broker_data, total_rows, pop_flag=False):

        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Locations: Loading row {} of {} ({})'.format(str(index),
                                                                          str(total_rows),
                                                                          datetime.now() - start_time))
            if pop_flag:
                location_value_map = {"place_of_performance_flag": True}
                field_map = pop_field_map
            else:
                location_value_map = {'recipient_flag': True}
                field_map = le_field_map

            row = canonicalize_location_dict(row)

            # THIS ASSUMPTION DOES NOT HOLD FOR FPDS SINCE IT DOES NOT HAVE A PLACE OF PERFORMANCE CODE
            # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then
            # the country code is USA
            # if pop_flag and not country_code and pop_code != '00FORGN':
            #     row[field_map.get('location_country_code')] = 'USA'

            # Get country code obj
            location_country_code_obj = self.country_code_map.get(row[field_map.get('location_country_code')])

            # Fix state code periods
            state_code = row.get(field_map.get('state_code'))
            if state_code is not None:
                location_value_map.update({'state_code': state_code.replace('.', '')})

            if location_country_code_obj:
                location_value_map.update({
                    'location_country_code': location_country_code_obj,
                    'country_name': location_country_code_obj.country_name
                })

                if location_country_code_obj.country_code != 'USA':
                    location_value_map.update({
                        'state_code': None,
                        'state_name': None
                    })
            else:
                # no country found for this code
                location_value_map.update({
                    'location_country_code': None,
                    'country_name': None
                })

            location_instance_data = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=field_map,
                as_dict=True)

            loc_instance = Location(**location_instance_data)
            loc_instance.load_city_county_data()
            loc_instance.fill_missing_state_data()
            loc_instance.fill_missing_zip5()

            if pop_flag:
                pop_bulk.append(loc_instance)
            else:
                lel_bulk.append(loc_instance)

        if pop_flag:
            logger.info('Bulk creating POP Locations (batch_size: {})...'.format(BATCH_SIZE))
            Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE)
        else:
            logger.info('Bulk creating LE Locations (batch_size: {})...'.format(BATCH_SIZE))
            Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)
def get_or_create_location(location_map, row, location_value_map=None, empty_location=None, d_file=False, save=True):
    """
    Retrieve or create a location object

    Input parameters:
        - location_map: a dictionary with key = field name on the location model
            and value = corresponding field name on the current row of data
        - row: the row of data currently being loaded
    """
    if location_value_map is None:
        location_value_map = {}

    row = canonicalize_location_dict(row)

    # For only FABS
    if "place_of_performance_code" in row:
        # If the recipient's location country code is empty or it's 'UNITED STATES
        # OR the place of performance location country code is empty and the performance code isn't 00FORGN
        # OR the place of performance location country code is empty and there isn't a performance code
        # OR the country code is a US territory
        # THEN we can assume that the location country code is 'USA'
        if ('recipient_flag' in location_value_map and location_value_map['recipient_flag'] and
                (row[location_map.get('location_country_code')] is None or
                    row[location_map.get('location_country_code')] == 'UNITED STATES')) or \
                ('place_of_performance_flag' in location_value_map and
                    location_value_map['place_of_performance_flag'] and
                    row[location_map.get('location_country_code')] is None and
                    "performance_code" in location_map and row[location_map["performance_code"]] != '00FORGN') or \
                ('place_of_performance_flag' in location_value_map and
                    location_value_map['place_of_performance_flag'] and
                    row[location_map.get('location_country_code')] is None and
                    "performance_code" not in location_map) or \
                (row[location_map.get('location_country_code')] in territory_country_codes):
            row[location_map["location_country_code"]] = 'USA'

    state_code = row.get(location_map.get('state_code'))
    if state_code is not None:
        # Remove . in state names (i.e. D.C.)
        location_value_map.update({'state_code': state_code.replace('.', '')})

    location_value_map.update({
        'location_country_code': location_map.get('location_country_code'),
        'country_name': location_map.get('location_country_name'),
        'state_code': None,  # expired
        'state_name': None,
    })

    location_data = load_data_into_model(
        Location(), row, value_map=location_value_map, field_map=location_map, as_dict=True)

    del location_data['data_source']  # hacky way to ensure we don't create a series of empty location records
    if len(location_data):

        if len(location_data) == 1 and "place_of_performance_flag" in location_data and\
                location_data["place_of_performance_flag"]:
            location_object = None
            created = False
        elif save:
            location_object = load_data_into_model(Location(), row, value_map=location_value_map,
                                                   field_map=location_map, as_dict=False, save=True)
            created = False
        else:
            location_object = load_data_into_model(Location(), row, value_map=location_value_map,
                                                   field_map=location_map, as_dict=False)
            # location_object = Location.objects.create(**location_data)
            created = True

        return location_object, created
    else:
        # record had no location information at all
        return None, None