def insert_new_fpds(self, to_insert, total_rows):
        logger.info('Starting insertion of new FPDS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info(
                    'Inserting Stale FPDS: Inserting row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "")
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fpds'),
                "is_fpds":
                True
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \
                (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \
                (row['piid'] if row['piid'] else '-NONE-') + '_' + \
                (row['parent_award_id'] if row['parent_award_id'] else '-NONE-')

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row['piid'])
            award.parent_award_piid = row.get('parent_award_id')
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['last_modified']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row['detached_award_proc_unique'],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description"
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            contract_instance = load_data_into_model(
                TransactionFPDS(),  # thrown away
                row,
                as_dict=True)

            detached_award_proc_unique = contract_instance[
                'detached_award_proc_unique']
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()
    def create_subaward(self, row, shared_award_mappings, award_type):
        """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made
            to satisfy codeclimate complexity issues)
        """

        # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem
        # finding one or more parts of the shared data for it and we don't want to insert it.
        if row['internal_id'] not in shared_award_mappings:
            logger.info(
                "[Internal ID {}] not in 'shared_award_mappings'".format(
                    row["internal_id"]))
        else:
            shared_mappings = shared_award_mappings[row['internal_id']]

            prime_award_dict = {}
            if shared_mappings['award']:
                prime_award_dict['prime_recipient'] = shared_mappings[
                    'award'].recipient
                if prime_award_dict['prime_recipient']:
                    prime_award_dict['prime_recipient_name'] = shared_mappings[
                        'award'].recipient.recipient_name
                    prime_award_dict['business_categories'] = (
                        shared_mappings['award'].recipient.business_categories
                        or [])

            upper_case_dict_values(row)

            subaward_dict = {
                'recipient_unique_id':
                row['duns'],
                'recipient_name':
                row['recipient_name'],
                'dba_name':
                row['dba_name'],
                'parent_recipient_unique_id':
                row['parent_duns'],
                'parent_recipient_name':
                row.get('parent_recipient_name', None),
                'business_type_description':
                row.get('bus_types', None),
                'prime_recipient':
                prime_award_dict.get('prime_recipient', None),
                'prime_recipient_name':
                prime_award_dict.get('prime_recipient_name', None),
                'business_categories':
                prime_award_dict.get('business_categories', []),
                'recipient_location_country_code':
                row['recipient_location_country_code'],
                'recipient_location_state_code':
                row['recipient_location_state_code'],
                'recipient_location_state_name':
                row['recipient_location_state_name'],
                'recipient_location_city_name':
                row['recipient_location_city_name'],
                'recipient_location_zip4':
                row['recipient_location_zip4'],
                'recipient_location_zip5':
                row['recipient_location_zip5'],
                'recipient_location_street_address':
                row['recipient_location_street_address'],
                'recipient_location_congressional_code':
                row['recipient_location_congressional_code'],
                'officer_1_name':
                row['top_paid_fullname_1'],
                'officer_1_amount':
                row['top_paid_amount_1'],
                'officer_2_name':
                row['top_paid_fullname_2'],
                'officer_2_amount':
                row['top_paid_amount_2'],
                'officer_3_name':
                row['top_paid_fullname_3'],
                'officer_3_amount':
                row['top_paid_amount_3'],
                'officer_4_name':
                row['top_paid_fullname_4'],
                'officer_4_amount':
                row['top_paid_amount_4'],
                'officer_5_name':
                row['top_paid_fullname_5'],
                'officer_5_amount':
                row['top_paid_amount_5'],
                'data_source':
                "DBR",
                'subaward_number':
                row['subaward_num'],
                'amount':
                row['subaward_amount'],
                'description':
                row['description'],
                'recovery_model_question1':
                row['q1_flag'],
                'recovery_model_question2':
                row['q2_flag'],
                'action_date':
                row['action_date'],
                'award_report_fy_month':
                row['report_period_mon'],
                'award_report_fy_year':
                row['report_period_year'],
                'broker_award_id':
                row['id'],
                'internal_id':
                row['internal_id'],
                'award_type':
                award_type,
                'pop_country_code':
                row['principle_place_country'],
                'pop_state_code':
                row['principle_place_state'],
                'pop_state_name':
                row['principle_place_state_name'],
                'pop_city_name':
                row['principle_place_city'],
                'pop_zip4':
                row['principle_place_zip'],
                'pop_street_address':
                row['principle_place_street'],
                'pop_congressional_code':
                row['principle_place_district'],
                'piid':
                row.get('piid', None),
                'fain':
                row.get('fain', None),
                'updated_at':
                datetime.now(timezone.utc),

                # keyword_ts_vector               # leave NULL, matview SQL will populate
                # award_ts_vector                 # leave NULL, matview SQL will populate
                # recipient_name_ts_vector        # leave NULL, matview SQL will populate
                # product_or_service_description  # leave NULL, matview SQL will populate
                # total_obl_bin                   # leave NULL, matview SQL will populate
                # business_type_code              # always NULL
                # extent_competed                 # always NULL
            }

            if shared_mappings['award']:
                subaward_dict.update({
                    "award_id":
                    shared_mappings["award"].id,
                    "prime_award_type":
                    shared_mappings["award"].type,
                    "last_modified_date":
                    shared_mappings["award"].last_modified_date,
                    "latest_transaction_id":
                    shared_mappings["award"].latest_transaction_id,
                    "business_categories":
                    get_le_business_categories(
                        shared_mappings["award"].recipient_id),
                })
                funding_agency = get_agency_values(
                    shared_mappings["award"].funding_agency)
                awarding_agency = get_agency_values(
                    shared_mappings["award"].awarding_agency)
                contract_data = get_contract_fields(
                    shared_mappings["award"].latest_transaction_id)
                if contract_data:
                    subaward_dict.update({
                        'pulled_from':
                        contract_data['pulled_from'],
                        'product_or_service_code':
                        contract_data['product_or_service_code'],
                        # 'product_or_service_description': None,
                        'type_of_contract_pricing':
                        contract_data['type_of_contract_pricing'],
                        'type_set_aside':
                        contract_data['type_set_aside'],
                    })

            else:
                funding_agency, awarding_agency = None, None

            if funding_agency:
                subaward_dict.update({
                    "funding_agency_id":
                    funding_agency["agency_id"],
                    "funding_toptier_agency_abbreviation":
                    funding_agency["toptier_agency_abbreviation"],
                    "funding_toptier_agency_name":
                    funding_agency["toptier_agency_name"],
                    "funding_subtier_agency_abbreviation":
                    funding_agency["subtier_agency_abbreviation"],
                    "funding_subtier_agency_name":
                    funding_agency["subtier_agency_name"],
                })
            if awarding_agency:
                subaward_dict.update({
                    "awarding_agency_id":
                    awarding_agency["agency_id"],
                    "awarding_toptier_agency_abbreviation":
                    awarding_agency["toptier_agency_abbreviation"],
                    "awarding_toptier_agency_name":
                    awarding_agency["toptier_agency_name"],
                    "awarding_subtier_agency_abbreviation":
                    awarding_agency["subtier_agency_abbreviation"],
                    "awarding_subtier_agency_name":
                    awarding_agency["subtier_agency_name"],
                })

            cfda = None
            if 'cfda_numbers' in row and row['cfda_numbers']:
                only_num = row['cfda_numbers'].split(' ')
                cfda = Cfda.objects.filter(program_number=only_num[0]).first()

            if cfda:
                subaward_dict.update({
                    "cfda_number": cfda.program_number,
                    "cfda_title": cfda.program_title,
                    "cfda_id": cfda.pk,
                })

            subaward_dict['pop_country_name'] = get_country_name_from_code(
                row['principle_place_country'])
            subaward_dict[
                'recipient_location_country_name'] = get_country_name_from_code(
                    row['recipient_location_country_code'])

            performance_city_county = get_city_and_county_from_state(
                row['principle_place_state'], row['principle_place_city'])
            subaward_dict['pop_county_code'] = performance_city_county.get(
                "county_code")
            subaward_dict['pop_county_name'] = performance_city_county.get(
                "county_name")
            subaward_dict['pop_city_code'] = performance_city_county.get(
                "city_code")

            ref_loc_city_county = get_city_and_county_from_state(
                row['recipient_location_state_code'],
                row['recipient_location_city_name'])
            subaward_dict[
                'recipient_location_county_code'] = ref_loc_city_county.get(
                    "county_code")
            subaward_dict[
                'recipient_location_county_name'] = ref_loc_city_county.get(
                    "county_name")
            subaward_dict[
                'recipient_location_city_code'] = ref_loc_city_county.get(
                    "city_code")

            # Either we're starting with an empty table in regards to this award type or we've deleted all
            # subawards related to the internal_id, either way we just create the subaward
            Subaward.objects.create(**subaward_dict)
            if shared_mappings["award"]:
                award_update_id_list.add(shared_mappings["award"].id)
Exemple #3
0
    def insert_new_fpds(self, to_insert, total_rows):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
        }

        for index, row in enumerate(to_insert, 1):
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row["awardee_or_recipient_legal"]
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row["awardee_or_recipient_uniqu"],
                recipient_name=recipient_name
                if recipient_name is not None else "",
            )
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type="fpds"),
                "is_fpds":
                True,
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = (
                "CONT_AW_" +
                (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" +
                (row["referenced_idv_agency_iden"]
                 if row["referenced_idv_agency_iden"] else "-NONE-") + "_" +
                (row["piid"] if row["piid"] else "-NONE-") + "_" +
                (row["parent_award_id"]
                 if row["parent_award_id"] else "-NONE-"))

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row["piid"])
            award.parent_award_piid = row.get("parent_award_id")
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row["last_modified"]), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row["last_modified"]),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date":
                format_date(row["period_of_performance_curr"]),
                "action_date":
                format_date(row["action_date"]),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row["detached_award_proc_unique"],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True,
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            contract_instance = load_data_into_model(
                TransactionFPDS(), row, as_dict=True)  # thrown away

            detached_award_proc_unique = contract_instance[
                "detached_award_proc_unique"]
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.now(
                    timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(
                    id=unique_fpds.first().transaction.id).update(
                        **transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = detached_award_proc_unique
            legal_entity.save()
Exemple #4
0
    def create_subaward(self, row, shared_award_mappings, award_type):
        """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made
            to satisfy codeclimate complexity issues)
        """

        # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem
        # finding one or more parts of the shared data for it and we don't want to insert it.
        if row['internal_id'] in shared_award_mappings:
            shared_mappings = shared_award_mappings[row['internal_id']]

            prime_award_dict = {}
            if shared_mappings['award']:
                prime_award_dict['prime_recipient'] = shared_mappings['award'].recipient
                if prime_award_dict['prime_recipient']:
                    prime_award_dict['prime_recipient_name'] = shared_mappings['award'].recipient.recipient_name
                    prime_award_dict['business_categories'] = (shared_mappings['award'].recipient.business_categories
                                                               or [])

            upper_case_dict_values(row)

            cfda = None
            # check if the key exists and if it isn't empty (only here for grants)
            if 'cfda_numbers' in row and row['cfda_numbers']:
                only_num = row['cfda_numbers'].split(' ')
                cfda = Cfda.objects.filter(program_number=only_num[0]).first()

            if award_type == 'procurement':
                le_location_map = location_d1_recipient_mapper(row)
                recipient_name = row['company_name']
                parent_recipient_name = row['parent_company_name']
                business_type_code = None
                business_types_description = row['bus_types']
            else:
                le_location_map = location_d2_recipient_mapper(row)
                recipient_name = row['awardee_name']
                parent_recipient_name = None
                business_type_code = None
                business_types_description = None

            if le_location_map["location_zip"]:
                le_location_map.update(
                    zip4=le_location_map["location_zip"],
                    zip5=le_location_map["location_zip"][:5],
                    zip_last4=le_location_map["location_zip"][5:]
                )

            le_location_map.pop("location_zip")
            recipient_location = Location(**le_location_map)
            recipient_location.pre_save()

            pop_value_map = pop_mapper(row)
            pop_value_map['place_of_performance_flag'] = True

            if pop_value_map["location_zip"]:
                pop_value_map.update(
                    zip4=pop_value_map["location_zip"],
                    zip5=pop_value_map["location_zip"][:5],
                    zip_last4=pop_value_map["location_zip"][5:]
                )

            pop_value_map.pop("location_zip")
            place_of_performance = Location(**pop_value_map)
            place_of_performance.pre_save()

            if not parent_recipient_name and row.get('parent_duns'):
                duns_obj = RecipientLookup.objects.filter(duns=row['parent_duns'], legal_business_name__isnull=False) \
                    .values('legal_business_name').first()
                if duns_obj:
                    parent_recipient_name = duns_obj['legal_business_name']

            subaward_dict = {
                'award': shared_mappings['award'],
                'recipient_unique_id': row['duns'],
                'recipient_name': recipient_name,
                'dba_name': row['dba_name'],
                'parent_recipient_unique_id': row['parent_duns'],
                'parent_recipient_name': parent_recipient_name,
                'business_type_code': business_type_code,
                'business_type_description': business_types_description,

                'prime_recipient': prime_award_dict.get('prime_recipient', None),
                'prime_recipient_name': prime_award_dict.get('prime_recipient_name', None),
                'business_categories': prime_award_dict.get('business_categories', []),

                'recipient_location_country_code': recipient_location.location_country_code,
                'recipient_location_country_name': recipient_location.country_name,
                'recipient_location_state_code': recipient_location.state_code,
                'recipient_location_state_name': recipient_location.state_name,
                'recipient_location_county_code': recipient_location.county_code,
                'recipient_location_county_name': recipient_location.county_name,
                'recipient_location_city_code': recipient_location.city_code,
                'recipient_location_city_name': recipient_location.city_name,
                'recipient_location_zip4': recipient_location.zip4,
                'recipient_location_zip5': recipient_location.zip5,
                'recipient_location_street_address': recipient_location.address_line1,
                'recipient_location_congressional_code': recipient_location.congressional_code,
                'recipient_location_foreign_postal_code': recipient_location.foreign_postal_code,

                'officer_1_name': row['top_paid_fullname_1'],
                'officer_1_amount': row['top_paid_amount_1'],
                'officer_2_name': row['top_paid_fullname_2'],
                'officer_2_amount': row['top_paid_amount_2'],
                'officer_3_name': row['top_paid_fullname_3'],
                'officer_3_amount': row['top_paid_amount_3'],
                'officer_4_name': row['top_paid_fullname_4'],
                'officer_4_amount': row['top_paid_amount_4'],
                'officer_5_name': row['top_paid_fullname_5'],
                'officer_5_amount': row['top_paid_amount_5'],

                'data_source': "DBR",
                'cfda': cfda,
                'awarding_agency': shared_mappings['award'].awarding_agency if shared_mappings['award'] else None,
                'funding_agency': shared_mappings['award'].funding_agency if shared_mappings['award'] else None,
                'subaward_number': row['subaward_num'],
                'amount': row['subaward_amount'],
                'description': row['overall_description'],
                'recovery_model_question1': row['q1_flag'],
                'recovery_model_question2': row['q2_flag'],
                'action_date': row['subaward_date'],
                'award_report_fy_month': row['report_period_mon'],
                'award_report_fy_year': row['report_period_year'],
                'broker_award_id': row['id'],
                'internal_id': row['internal_id'],
                'award_type': award_type,

                'pop_country_code': row['principle_place_country'],
                'pop_country_name': place_of_performance.country_name,
                'pop_state_code': row['principle_place_state'],
                'pop_state_name': row['principle_place_state_name'],
                'pop_county_code': place_of_performance.county_code,
                'pop_county_name': place_of_performance.county_name,
                'pop_city_code': place_of_performance.city_code,
                'pop_city_name': row['principle_place_city'],
                'pop_zip4': row['principle_place_zip'],
                'pop_street_address': row['principle_place_street'],
                'pop_congressional_code': row['principle_place_district'],
                'updated_at': datetime.utcnow()
            }

            # Either we're starting with an empty table in regards to this award type or we've deleted all
            # subawards related to the internal_id, either way we just create the subaward
            Subaward.objects.create(**subaward_dict)
            if shared_mappings['award']:
                award_update_id_list.append(shared_mappings['award'].id)
Exemple #5
0
    def insert_new_fabs(self, to_insert, total_rows):
        logger.info('Starting insertion of new FABS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perform_country_n",
            "state_code": "place_of_perfor_state_code",
            "state_name": "place_of_perform_state_nam",
            "city_name": "place_of_performance_city",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "foreign_location_description": "place_of_performance_forei",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "performance_code": "place_of_performance_code",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "city_name": "legal_entity_city_name",
            "city_code": "legal_entity_city_code",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "foreign_location_description": "legal_entity_foreign_descr",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "foreign_city_name": "legal_entity_foreign_city"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info(
                    'Inserting Stale FABS: Inserting row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True})
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "",
                parent_recipient_unique_id=row['ultimate_parent_unique_ide'])
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fabs'),
                "business_types_description":
                row['business_types_desc']
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

            # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
            # non-numeric record type values
            record_type_int = int(row['record_type'])
            if record_type_int == 1:
                uri = row['uri'] if row['uri'] else '-NONE-'
                fain = '-NONE-'
            elif record_type_int in (2, 3):
                uri = '-NONE-'
                fain = row['fain'] if row['fain'] else '-NONE-'
            else:
                raise Exception(
                    'Invalid record type encountered for the following afa_generated_unique record: %s'
                    % row['afa_generated_unique'])

            generated_unique_id = 'ASST_AW_' +\
                (row['awarding_sub_tier_agency_c'] if row['awarding_sub_tier_agency_c'] else '-NONE-') + '_' + \
                fain + '_' + uri

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                fain=row['fain'],
                uri=row['uri'],
                record_type=row['record_type'])
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['modified_at']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "type_description":
                row['assistance_type_desc'],
                "transaction_unique_id":
                row['afa_generated_unique'],
                "generated_unique_award_id":
                generated_unique_id
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            afa_generated_unique = financial_assistance_data[
                'afa_generated_unique']
            unique_fabs = TransactionFABS.objects.filter(
                afa_generated_unique=afa_generated_unique)

            if unique_fabs.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # Update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # Update TransactionFABS
                unique_fabs.update(**financial_assistance_data)
            else:
                # Create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # Create TransactionFABS
                transaction_fabs = TransactionFABS(transaction=transaction,
                                                   **financial_assistance_data)
                transaction_fabs.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = afa_generated_unique
            legal_entity.save()
Exemple #6
0
    def create_subaward(self, row, shared_award_mappings, award_type):
        """ Creates a subaward if the internal ID of the current row is in the shared award mappings (this was made
            to satisfy codeclimate complexity issues)
        """

        # only insert the subaward if the internal_id is in our mappings, otherwise there was a problem
        # finding one or more parts of the shared data for it and we don't want to insert it.
        if row['internal_id'] in shared_award_mappings:
            shared_mappings = shared_award_mappings[row['internal_id']]

            upper_case_dict_values(row)

            cfda = None
            # check if the key exists and if it isn't empty (only here for grants)
            if 'cfda_numbers' in row and row['cfda_numbers']:
                only_num = row['cfda_numbers'].split(' ')
                cfda = Cfda.objects.filter(program_number=only_num[0]).first()

            recipient, place_of_performance = self.get_subaward_references(
                row, award_type)

            subaward_dict = {
                'award':
                shared_mappings['award'],
                'recipient':
                recipient,
                'data_source':
                "DBR",
                'cfda':
                cfda,
                'awarding_agency':
                shared_mappings['award'].awarding_agency
                if shared_mappings['award'] else None,
                'funding_agency':
                shared_mappings['award'].funding_agency
                if shared_mappings['award'] else None,
                'place_of_performance':
                place_of_performance,
                'subaward_number':
                row['subaward_num'],
                'amount':
                row['subaward_amount'],
                'description':
                row['overall_description'],
                'recovery_model_question1':
                row['q1_flag'],
                'recovery_model_question2':
                row['q2_flag'],
                'action_date':
                row['subaward_date'],
                'award_report_fy_month':
                row['report_period_mon'],
                'award_report_fy_year':
                row['report_period_year'],
                'broker_award_id':
                row['id'],
                'internal_id':
                row['internal_id'],
                'award_type':
                award_type
            }

            # Either we're starting with an empty table in regards to this award type or we've deleted all
            # subawards related to the internal_id, either way we just create the subaward
            Subaward.objects.create(**subaward_dict)
            if shared_mappings['award']:
                award_update_id_list.append(shared_mappings['award'].id)
def load_file_c(submission_attributes, db_cursor, award_financial_frame):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some
    additional information about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we
    # change up the order of file loading

    if not award_financial_frame.size:
        logger.warning('No File C (award financial) data found, skipping...')
        return

    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    award_financial_frame['txn'] = award_financial_frame.apply(
        get_award_financial_transaction, axis=1)
    award_financial_frame['awarding_agency'] = award_financial_frame.apply(
        get_awarding_agency, axis=1)
    award_financial_frame['object_class'] = award_financial_frame.apply(
        get_or_create_object_class_rw, axis=1, logger=logger)
    award_financial_frame['program_activity'] = award_financial_frame.apply(
        get_or_create_program_activity,
        axis=1,
        submission_attributes=submission_attributes)

    total_rows = award_financial_frame.shape[0]
    start_time = datetime.now()
    awards_touched = []

    # for row in award_financial_data:
    for index, row in enumerate(
            award_financial_frame.replace({
                np.nan: None
            }).to_dict(orient='records'), 1):
        if not (index % 100):
            logger.info('C File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        upper_case_dict_values(row)

        # Check and see if there is an entry for this TAS
        treasury_account = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            update_skipped_tas(row, skipped_tas)
            continue

        # Find a matching transaction record, so we can use its subtier agency information to match to (or create) an
        # Award record.

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        created, award = get_or_create_summary_award(
            awarding_agency=row['awarding_agency'],
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))

        awards_touched += [award]

        award_financial_data = FinancialAccountsByAwards()

        value_map_faba = {
            'award': award,
            'submission': submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            'treasury_account': treasury_account,
            'object_class': row.get('object_class'),
            'program_activity': row.get('program_activity'),
        }

        # Still using the cpe|fyb regex compiled above for reverse
        load_data_into_model(award_financial_data,
                             row,
                             value_map=value_map_faba,
                             save=True,
                             reverse=reverse)

    awards_cache.clear()

    for key in skipped_tas:
        logger.info('Skipped %d rows due to missing TAS: %s',
                    skipped_tas[key]['count'], key)

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]['count']

    logger.info(
        'Skipped a total of {} TAS rows for File C'.format(total_tas_skipped))
    return [id for award.id in awards_touched]