def get_or_create_award(self, row, awarding_agency):
     fain = row.get("federal_award_id", None)
     uri = row.get("unique_transaction_id",
                   None)  # ask: why unique_transaction_id instead of uri?
     created, award = Award.get_or_create_summary_award(
         fain=fain, uri=uri, awarding_agency=awarding_agency)
     return award
Exemple #2
0
    def get_or_create_award(self, row, awarding_agency_id):
        piid = row.get("piid", None)
        parent_award_id = row.get("idvpiid", None)

        awarding_agency = Agency.objects.get(id=awarding_agency_id)
        award = Award.get_or_create_summary_award(
            piid=piid, fain=None, uri=None, awarding_agency=awarding_agency,
            parent_award_id=parent_award_id)
        return award
    def load_parent_awards(self, fpds_broker_data, total_rows):
        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Parent Awards: Loading row {} of {} ({})'.format(
                    str(index), str(total_rows),
                    datetime.now() - start_time))

            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(
                    row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = self.subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = self.subtier_to_agency_map[
                    awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = self.toptier_agency_map[
                    awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = self.agency_no_sub_map.get(
                (row['awarding_agency_code'],
                 row["awarding_sub_tier_agency_c"]))

            if awarding_agency is None:
                awarding_agency = self.agency_sub_only_map.get(
                    row['awarding_agency_code'])

            # parent_award_id from the row = parent piid
            parent_award_piid = row.get('parent_award_id')
            parent_award = None
            if parent_award_piid:
                parent_award = self.award_map.get(parent_award_piid)
                if not parent_award:
                    create_kwargs = {
                        'awarding_agency': awarding_agency,
                        'piid': parent_award_piid
                    }
                    parent_award = Award(**create_kwargs)
                    self.award_map[parent_award_piid] = parent_award
                    parent_award_bulk.append(parent_award)

            parent_award_lookup.append(parent_award)

        logger.info('Bulk creating Parent Awards (batch_size: {})...'.format(
            BATCH_SIZE))
        Award.objects.bulk_create(parent_award_bulk, batch_size=BATCH_SIZE)
    def insert_new_fpds(self, to_insert, total_rows):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
        }

        fpds_normalized_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        fpds_field_map = {
            "officer_1_name": "high_comp_officer1_full_na",
            "officer_1_amount": "high_comp_officer1_amount",
            "officer_2_name": "high_comp_officer2_full_na",
            "officer_2_amount": "high_comp_officer2_amount",
            "officer_3_name": "high_comp_officer3_full_na",
            "officer_3_amount": "high_comp_officer3_amount",
            "officer_4_name": "high_comp_officer4_full_na",
            "officer_4_amount": "high_comp_officer4_amount",
            "officer_5_name": "high_comp_officer5_full_na",
            "officer_5_amount": "high_comp_officer5_amount"
        }

        for index, row in enumerate(to_insert, 1):
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True}
            )
            recipient_name = row["awardee_or_recipient_legal"]
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row["awardee_or_recipient_uniqu"],
                recipient_name=recipient_name if recipient_name is not None else "",
            )
            legal_entity_value_map = {
                "location": legal_entity_location,
                "business_categories": get_business_categories(row=row, data_type="fpds"),
                "is_fpds": True,
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=row["unique_award_key"],
                piid=row["piid"],
            )
            award.parent_award_piid = row.get("parent_award_id")
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"):  # 19 characters
                dt_fmt = "%Y-%m-%d %H:%M:%S"
            else:
                dt_fmt = "%Y-%m-%d %H:%M:%S.%f"  # try using this even if last_modified isn't a valid string

            try:
                last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date()
            except ValueError:  # handle odd-string formats and NULLs from the upstream FPDS-NG system
                info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt)
                logger.info(info_message)
                last_mod_date = None

            award_type, award_type_desc = award_types(row)

            parent_txn_value_map = {
                "award": award,
                "awarding_agency": awarding_agency,
                "funding_agency": funding_agency,
                "recipient": legal_entity,
                "place_of_performance": pop_location,
                "period_of_performance_start_date": format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]),
                "action_date": format_date(row["action_date"]),
                "last_modified_date": last_mod_date,
                "transaction_unique_id": row["detached_award_proc_unique"],
                "is_fpds": True,
                "type": award_type,
                "type_description": award_type_desc,
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fpds_normalized_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            contract_instance = load_data_into_model(
                TransactionFPDS(),  # thrown away
                row,
                field_map=fpds_field_map,
                as_dict=True
            )

            detached_award_proc_unique = contract_instance["detached_award_proc_unique"]
            unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.now(timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update(
                    **transaction_normalized_dict
                )

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(**transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance)
                transaction_fpds.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = detached_award_proc_unique
            legal_entity.save()
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False):
    """
    Process and load file D1 broker data (contract award txns).
    """

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {
        "place_of_performance_flag": True
    }

    legal_entity_location_value_map = {
        "recipient_flag": True
    }

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    d_start_time = time.time()

    if quick:
        parameters = {'broker_submission_id': submission_attributes.broker_submission_id}
        run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters)
        logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
        return

    total_rows = len(procurement_data)

    start_time = datetime.now()
    for index, row in enumerate(procurement_data, 1):
        if not (index % 100):
            logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                         str(total_rows),
                                                                         datetime.now() - start_time))

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
        )

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
        )

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row, copy(place_of_performance_value_map))

        # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code if funding_agency is not None
                else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'],
            row["awarding_sub_tier_agency_c"]
        )
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_piid=row.get('parent_award_id'))  # It is a FAIN/PIID/URI, not our db's pk
        award.save()

        award_update_id_list.append(award.id)
        award_contract_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                            row["funding_sub_tier_agency_co"]),
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            'submission': submission_attributes,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
        transaction.save()

        contract_value_map = {
            'submission': submission_attributes,
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(
            TransactionFPDS(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=contract_value_map,
            as_dict=True)

        transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
        transaction_contract.save()
    logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
    def insert_new_fpds(self, to_insert, total_rows):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
        }

        for index, row in enumerate(to_insert, 1):
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True}
            )
            recipient_name = row["awardee_or_recipient_legal"]
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row["awardee_or_recipient_uniqu"],
                recipient_name=recipient_name if recipient_name is not None else "",
            )
            legal_entity_value_map = {
                "location": legal_entity_location,
                "business_categories": get_business_categories(row=row, data_type="fpds"),
                "is_fpds": True,
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = (
                "CONT_AW_"
                + (row["agency_id"] if row["agency_id"] else "-NONE-")
                + "_"
                + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-")
                + "_"
                + (row["piid"] if row["piid"] else "-NONE-")
                + "_"
                + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-")
            )

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id, piid=row["piid"]
            )
            award.parent_award_piid = row.get("parent_award_id")
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"):  # 19 characters
                dt_fmt = "%Y-%m-%d %H:%M:%S"
            else:
                dt_fmt = "%Y-%m-%d %H:%M:%S.%f"  # try using this even if last_modified isn't a valid string

            try:
                last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date()
            except ValueError:  # handle odd-string formats and NULLs from the upstream FPDS-NG system
                info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt)
                logger.info(info_message)
                last_mod_date = None

            award_type, award_type_desc = award_types(row)

            parent_txn_value_map = {
                "award": award,
                "awarding_agency": awarding_agency,
                "funding_agency": funding_agency,
                "recipient": legal_entity,
                "place_of_performance": pop_location,
                "period_of_performance_start_date": format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]),
                "action_date": format_date(row["action_date"]),
                "last_modified_date": last_mod_date,
                "transaction_unique_id": row["detached_award_proc_unique"],
                "generated_unique_award_id": generated_unique_id,
                "is_fpds": True,
                "type": award_type,
                "type_description": award_type_desc,
            }

            contract_field_map = {"description": "award_description"}

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            contract_instance = load_data_into_model(TransactionFPDS(), row, as_dict=True)  # thrown away

            detached_award_proc_unique = contract_instance["detached_award_proc_unique"]
            unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.now(timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update(
                    **transaction_normalized_dict
                )

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(**transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance)
                transaction_fpds.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = detached_award_proc_unique
            legal_entity.save()
def load_file_d2(submission_attributes, award_financial_assistance_data,
                 db_cursor):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """
    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    legal_entity_location_value_map = {"recipient_flag": True}

    place_of_performance_value_map = {"place_of_performance_flag": True}

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    for row in award_financial_assistance_data:

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        # Create the legal entity if it doesn't exist
        try:
            legal_entity = LegalEntity.objects.get(
                recipient_unique_id=row['awardee_or_recipient_uniqu'])
        except ObjectDoesNotExist:
            legal_entity_value_map = {
                "location": legal_entity_location,
                "legal_entity_id": row['awardee_or_recipient_uniqu']
            }
            legal_entity = load_data_into_model(
                LegalEntity(),
                row,
                value_map=legal_entity_value_map,
                save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up
        if row['awarding_agency_code'] is None:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        if row['funding_agency_code'] is None:
            row['funding_agency_code'] = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))
        award.save()

        AWARD_UPDATE_ID_LIST.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_instance = load_data_into_model(
            Transaction(),
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction_instance, created = Transaction.objects.get_or_create(
            **transaction_instance)

        fad_value_map = {
            "transaction":
            transaction_instance,
            "submission":
            submission_attributes,
            "cfda":
            CFDAProgram.objects.filter(
                program_number=row['cfda_number']).first(),
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionAssistance(),
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            save=True)
def load_file_c(submission_attributes, award_financial_data, db_cursor):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try
    to join to those records to retrieve some additional information
    about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here
    # to ensure that we don't overwrite it as we change up the order of
    # file loading
    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')

    for row in award_financial_data:
        # Check and see if there is an entry for this TAS
        treasury_account = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            raise Exception('Could not find appropriation account for TAS: ' +
                            row['tas'])

        # Find a matching transaction record, so we can use its
        # subtier agency information to match to (or create) an Award record
        awarding_cgac = row.get('agency_identifier')  # cgac from record's TAS
        txn = get_award_financial_transaction(
            awarding_cgac,
            piid=row.get('piid'),
            parent_award_id=row.get('parent_award_id'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        if txn is not None:
            # We found a matching transaction, so grab its awarding agency
            # info and pass it get_or_create_summary_award
            awarding_agency = txn.awarding_agency
        else:
            # No matching transaction found, so find/create Award by using
            # topiter agency only, since CGAC code is the only piece of
            # awarding agency info that we have.
            awarding_agency = Agency.get_by_toptier(awarding_cgac)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'),
            use_cache=False)

        award_financial_data = FinancialAccountsByAwards()

        value_map = {
            'award':
            award,
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            'treasury_account':
            treasury_account,
            'object_class':
            get_or_create_object_class(row['object_class'],
                                       row['by_direct_reimbursable_fun'],
                                       logger),
            'program_activity':
            get_or_create_program_activity(row, submission_attributes)
        }

        # Still using the cpe|fyb regex compiled above for reverse
        afd = load_data_into_model(award_financial_data,
                                   row,
                                   value_map=value_map,
                                   save=True,
                                   reverse=reverse)

    awards_cache.clear()
    def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {
            "recipient_flag": True
        }

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {
            "place_of_performance_flag": True
        }

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                                 str(total_rows),
                                                                                 datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
                )

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name
                )

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row, copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"]
                )
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award": award,
                    "awarding_agency": awarding_agency,
                    "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                                    row["funding_sub_tier_agency_co"]),
                    "recipient": legal_entity,
                    "place_of_performance": pop_location,
                    "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                    "action_date": format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass
    def update_transaction_contract(db_cursor,
                                    fiscal_year=None,
                                    page=1,
                                    limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {"recipient_flag": True}

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {"place_of_performance_flag": True}

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info(
                        'D1 File Load: Loading row {} of {} ({})'.format(
                            str(index), str(total_rows),
                            datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row,
                    copy(legal_entity_location_value_map))

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(
                        legal_entity,
                        row,
                        value_map=legal_entity_value_map,
                        save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row,
                    copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(
                        row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[
                        row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[
                        awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[
                        awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(
                        row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(
                        row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[
                            funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"])
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award":
                    award,
                    "awarding_agency":
                    awarding_agency,
                    "funding_agency":
                    Agency.get_by_toptier_subtier(
                        row['funding_agency_code'],
                        row["funding_sub_tier_agency_co"]),
                    "recipient":
                    legal_entity,
                    "place_of_performance":
                    pop_location,
                    "period_of_performance_start_date":
                    format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date":
                    format_date(row['period_of_performance_curr']),
                    "action_date":
                    format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(
                    **transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction,
                                                       **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass
Exemple #11
0
    def insert_new_fpds(self, to_insert, total_rows):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
        }

        for index, row in enumerate(to_insert, 1):
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row["awardee_or_recipient_legal"]
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row["awardee_or_recipient_uniqu"],
                recipient_name=recipient_name
                if recipient_name is not None else "",
            )
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type="fpds"),
                "is_fpds":
                True,
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = (
                "CONT_AW_" +
                (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" +
                (row["referenced_idv_agency_iden"]
                 if row["referenced_idv_agency_iden"] else "-NONE-") + "_" +
                (row["piid"] if row["piid"] else "-NONE-") + "_" +
                (row["parent_award_id"]
                 if row["parent_award_id"] else "-NONE-"))

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row["piid"])
            award.parent_award_piid = row.get("parent_award_id")
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row["last_modified"]), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row["last_modified"]),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date":
                format_date(row["period_of_performance_curr"]),
                "action_date":
                format_date(row["action_date"]),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row["detached_award_proc_unique"],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True,
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            contract_instance = load_data_into_model(
                TransactionFPDS(), row, as_dict=True)  # thrown away

            detached_award_proc_unique = contract_instance[
                "detached_award_proc_unique"]
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.now(
                    timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(
                    id=unique_fpds.first().transaction.id).update(
                        **transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = detached_award_proc_unique
            legal_entity.save()
def insert_new_fabs(to_insert):
    place_of_performance_field_map = {
        "location_country_code": "place_of_perform_country_c",
        "country_name": "place_of_perform_country_n",
        "state_code": "place_of_perfor_state_code",
        "state_name": "place_of_perform_state_nam",
        "city_name": "place_of_performance_city",
        "county_name": "place_of_perform_county_na",
        "county_code": "place_of_perform_county_co",
        "foreign_location_description": "place_of_performance_forei",
        "zip_4a": "place_of_performance_zip4a",
        "congressional_code": "place_of_performance_congr",
        "performance_code": "place_of_performance_code",
        "zip_last4": "place_of_perform_zip_last4",
        "zip5": "place_of_performance_zip5",
    }

    legal_entity_location_field_map = {
        "location_country_code": "legal_entity_country_code",
        "country_name": "legal_entity_country_name",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "city_name": "legal_entity_city_name",
        "city_code": "legal_entity_city_code",
        "county_name": "legal_entity_county_name",
        "county_code": "legal_entity_county_code",
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "foreign_location_description": "legal_entity_foreign_descr",
        "congressional_code": "legal_entity_congressional",
        "zip_last4": "legal_entity_zip_last4",
        "zip5": "legal_entity_zip5",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "foreign_city_name": "legal_entity_foreign_city",
    }

    fabs_normalized_field_map = {
        "type": "assistance_type",
        "description": "award_description",
        "funding_amount": "total_funding_amount",
    }

    fabs_field_map = {
        "officer_1_name": "high_comp_officer1_full_na",
        "officer_1_amount": "high_comp_officer1_amount",
        "officer_2_name": "high_comp_officer2_full_na",
        "officer_2_amount": "high_comp_officer2_amount",
        "officer_3_name": "high_comp_officer3_full_na",
        "officer_3_amount": "high_comp_officer3_amount",
        "officer_4_name": "high_comp_officer4_full_na",
        "officer_4_amount": "high_comp_officer4_amount",
        "officer_5_name": "high_comp_officer5_full_na",
        "officer_5_amount": "high_comp_officer5_amount",
    }

    update_award_ids = []
    for row in to_insert:
        upper_case_dict_values(row)

        # Create new LegalEntityLocation and LegalEntity from the row data
        legal_entity_location = create_location(
            legal_entity_location_field_map, row, {"recipient_flag": True})
        recipient_name = row['awardee_or_recipient_legal']
        legal_entity = LegalEntity.objects.create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
            if recipient_name is not None else "",
            parent_recipient_unique_id=row['ultimate_parent_unique_ide'],
        )
        legal_entity_value_map = {
            "location":
            legal_entity_location,
            "business_categories":
            get_business_categories(row=row, data_type='fabs'),
            "business_types_description":
            row['business_types_desc'],
        }
        legal_entity = load_data_into_model(legal_entity,
                                            row,
                                            value_map=legal_entity_value_map,
                                            save=True)

        # Create the place of performance location
        pop_location = create_location(place_of_performance_field_map, row,
                                       {"place_of_performance_flag": True})

        # Find the toptier awards from the subtier awards
        awarding_agency = Agency.get_by_subtier_only(
            row["awarding_sub_tier_agency_c"])
        funding_agency = Agency.get_by_subtier_only(
            row["funding_sub_tier_agency_co"])

        # Create the summary Award
        (created, award) = Award.get_or_create_summary_award(
            generated_unique_award_id=row['unique_award_key'],
            fain=row['fain'],
            uri=row['uri'],
            record_type=row['record_type'],
        )
        award.save()

        # Append row to list of Awards updated
        update_award_ids.append(award.id)

        try:
            last_mod_date = datetime.strptime(str(row['modified_at']),
                                              "%Y-%m-%d %H:%M:%S.%f").date()
        except ValueError:
            last_mod_date = datetime.strptime(str(row['modified_at']),
                                              "%Y-%m-%d %H:%M:%S").date()

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            funding_agency,
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
            "last_modified_date":
            last_mod_date,
            "type_description":
            row['assistance_type_desc'],
            "transaction_unique_id":
            row['afa_generated_unique'],
        }

        transaction_normalized_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fabs_normalized_field_map,
            value_map=parent_txn_value_map,
            as_dict=True,
        )

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fabs_field_map,
            as_dict=True)

        # Hack to cut back on the number of warnings dumped to the log.
        financial_assistance_data['updated_at'] = cast_datetime_to_utc(
            financial_assistance_data['updated_at'])
        financial_assistance_data['created_at'] = cast_datetime_to_utc(
            financial_assistance_data['created_at'])
        financial_assistance_data['modified_at'] = cast_datetime_to_utc(
            financial_assistance_data['modified_at'])

        afa_generated_unique = financial_assistance_data[
            'afa_generated_unique']
        unique_fabs = TransactionFABS.objects.filter(
            afa_generated_unique=afa_generated_unique)

        if unique_fabs.first():
            transaction_normalized_dict["update_date"] = datetime.now(
                timezone.utc)
            transaction_normalized_dict["fiscal_year"] = fy(
                transaction_normalized_dict["action_date"])

            # Update TransactionNormalized
            TransactionNormalized.objects.filter(
                id=unique_fabs.first().transaction.id).update(
                    **transaction_normalized_dict)

            # Update TransactionFABS
            unique_fabs.update(**financial_assistance_data)
        else:
            # Create TransactionNormalized
            transaction_normalized = TransactionNormalized(
                **transaction_normalized_dict)
            transaction_normalized.save()

            # Create TransactionFABS
            transaction_fabs = TransactionFABS(
                transaction=transaction_normalized,
                **financial_assistance_data)
            transaction_fabs.save()

        # Update legal entity to map back to transaction
        legal_entity.transaction_unique_id = afa_generated_unique
        legal_entity.save()

    return update_award_ids
def insert_new_fabs(to_insert):
    place_of_performance_field_map = {
        "location_country_code": "place_of_perform_country_c",
        "country_name": "place_of_perform_country_n",
        "state_code": "place_of_perfor_state_code",
        "state_name": "place_of_perform_state_nam",
        "city_name": "place_of_performance_city",
        "county_name": "place_of_perform_county_na",
        "county_code": "place_of_perform_county_co",
        "foreign_location_description": "place_of_performance_forei",
        "zip_4a": "place_of_performance_zip4a",
        "congressional_code": "place_of_performance_congr",
        "performance_code": "place_of_performance_code",
        "zip_last4": "place_of_perform_zip_last4",
        "zip5": "place_of_performance_zip5",
    }

    legal_entity_location_field_map = {
        "location_country_code": "legal_entity_country_code",
        "country_name": "legal_entity_country_name",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "city_name": "legal_entity_city_name",
        "city_code": "legal_entity_city_code",
        "county_name": "legal_entity_county_name",
        "county_code": "legal_entity_county_code",
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "foreign_location_description": "legal_entity_foreign_descr",
        "congressional_code": "legal_entity_congressional",
        "zip_last4": "legal_entity_zip_last4",
        "zip5": "legal_entity_zip5",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "foreign_city_name": "legal_entity_foreign_city",
    }

    update_award_ids = []
    for row in to_insert:
        upper_case_dict_values(row)

        # Create new LegalEntityLocation and LegalEntity from the row data
        legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True})
        recipient_name = row['awardee_or_recipient_legal']
        legal_entity = LegalEntity.objects.create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name if recipient_name is not None else "",
            parent_recipient_unique_id=row['ultimate_parent_unique_ide'],
        )
        legal_entity_value_map = {
            "location": legal_entity_location,
            "business_categories": get_business_categories(row=row, data_type='fabs'),
            "business_types_description": row['business_types_desc'],
        }
        legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

        # Find the toptier awards from the subtier awards
        awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
        funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

        # Generate the unique Award ID
        # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

        # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
        # non-numeric record type values
        record_type_int = int(row['record_type'])
        if record_type_int == 1:
            uri = row['uri'] if row['uri'] else '-NONE-'
            fain = '-NONE-'
        elif record_type_int in (2, 3):
            uri = '-NONE-'
            fain = row['fain'] if row['fain'] else '-NONE-'
        else:
            msg = "Invalid record type encountered for the following afa_generated_unique record: {}"
            raise Exception(msg.format(row['afa_generated_unique']))

        astac = row["awarding_sub_tier_agency_c"] if row["awarding_sub_tier_agency_c"] else "-NONE-"
        generated_unique_id = "ASST_AW_{}_{}_{}".format(astac, fain, uri)

        # Create the summary Award
        (created, award) = Award.get_or_create_summary_award(
            generated_unique_award_id=generated_unique_id,
            fain=row['fain'],
            uri=row['uri'],
            record_type=row['record_type'],
        )
        award.save()

        # Append row to list of Awards updated
        update_award_ids.append(award.id)

        try:
            last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
        except ValueError:
            last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date()
        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": funding_agency,
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
            "last_modified_date": last_mod_date,
            "type_description": row['assistance_type_desc'],
            "transaction_unique_id": row['afa_generated_unique'],
            "generated_unique_award_id": generated_unique_id,
        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
            "funding_amount": "total_funding_amount",
        }

        transaction_normalized_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True,
        )

        financial_assistance_data = load_data_into_model(TransactionFABS(), row, as_dict=True)  # thrown away

        # Hack to cut back on the number of warnings dumped to the log.
        financial_assistance_data['updated_at'] = cast_datetime_to_utc(financial_assistance_data['updated_at'])
        financial_assistance_data['created_at'] = cast_datetime_to_utc(financial_assistance_data['created_at'])
        financial_assistance_data['modified_at'] = cast_datetime_to_utc(financial_assistance_data['modified_at'])

        afa_generated_unique = financial_assistance_data['afa_generated_unique']
        unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique)

        if unique_fabs.first():
            transaction_normalized_dict["update_date"] = datetime.now(timezone.utc)
            transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

            # Update TransactionNormalized
            TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).update(
                **transaction_normalized_dict
            )

            # Update TransactionFABS
            unique_fabs.update(**financial_assistance_data)
        else:
            # Create TransactionNormalized
            transaction_normalized = TransactionNormalized(**transaction_normalized_dict)
            transaction_normalized.save()

            # Create TransactionFABS
            transaction_fabs = TransactionFABS(transaction=transaction_normalized, **financial_assistance_data)
            transaction_fabs.save()

        # Update legal entity to map back to transaction
        legal_entity.transaction_unique_id = afa_generated_unique
        legal_entity.save()

    return update_award_ids
Exemple #14
0
    def update_transaction_assistance(db_cursor,
                                      fiscal_year=None,
                                      page=1,
                                      limit=500000):

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = "10/01/" + str(fiscal_year - 1)
        fy_end = "09/30/" + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += " action_date::Date BETWEEN %s AND %s"
            arguments += [fy_begin]
            arguments += [fy_end]
        query += " ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s"
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        fabs_normalized_field_map = {
            "type": "assistance_type",
            "description": "award_description",
            "funding_amount": "total_funding_amount",
        }

        fabs_field_map = {
            "officer_1_name": "high_comp_officer1_full_na",
            "officer_1_amount": "high_comp_officer1_amount",
            "officer_2_name": "high_comp_officer2_full_na",
            "officer_2_amount": "high_comp_officer2_amount",
            "officer_3_name": "high_comp_officer3_full_na",
            "officer_3_amount": "high_comp_officer3_amount",
            "officer_4_name": "high_comp_officer4_full_na",
            "officer_4_amount": "high_comp_officer4_amount",
            "officer_5_name": "high_comp_officer5_full_na",
            "officer_5_amount": "high_comp_officer5_amount",
        }

        logger.info("Getting total rows")

        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of assistance data")

        # ROW ITERATION STARTS HERE

        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        awarding_agency_list = []
        funding_agency_list = []

        logger.info("Getting award objects for {} rows...".format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row["awarding_agency_code"] is None or len(
                    row["awarding_agency_code"].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[
                    awarding_subtier_agency_id]["toptier_agency_id"]
                awarding_toptier_code = toptier_agency_map[
                    awarding_toptier_agency_id]
                row["awarding_agency_code"] = awarding_toptier_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row["funding_agency_code"] is None or len(
                    row["funding_agency_code"].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(
                    row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = subtier_to_agency_map[
                        funding_subtier_agency_id]["toptier_agency_id"]
                    funding_toptier_code = toptier_agency_map[
                        funding_toptier_agency_id]
                else:
                    funding_toptier_code = None
                row["funding_agency_code"] = funding_toptier_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row["awarding_agency_code"], row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_toptier_subtier(
                row["funding_agency_code"], row["funding_sub_tier_agency_co"])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get("fain"),
                uri=row.get("uri"),
                generated_unique_award_id=row.get("unique_award_key"),
                save=False,
            )

            award_bulk.append(award)
            award_update_id_list.append(award.id)
            award_assistance_update_id_list.append(award.id)

        logger.info("Bulk creating {} award rows...".format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info("!!! DUPLICATES FOUND. Continuing... ")

        logger.info("Getting transaction_normalized for {} rows...".format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award":
                award_bulk[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "period_of_performance_start_date":
                format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date":
                format_date(row["period_of_performance_curr"]),
                "action_date":
                format_date(row["action_date"]),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fabs_normalized_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            transaction_normalized = TransactionNormalized.get_or_create_transaction(
                **transaction_dict)
            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info("Bulk creating {} TransactionNormalized rows...".format(
            len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(
                transaction_normalized_bulk)
        except IntegrityError:
            logger.info(
                "Tried and failed to insert duplicate transaction_normalized row. Continuing... "
            )

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),
                row,
                field_map=fabs_field_map,
                as_dict=True  # thrown away
            )

            transaction_assistance = TransactionFABS(
                transaction=transaction_normalized_bulk[index - 1],
                **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info("Bulk creating TransactionFABS rows...")
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info("!!! DUPLICATES FOUND. Continuing... ")
Exemple #15
0
    def update_transaction_contract(db_cursor,
                                    fiscal_year=None,
                                    page=1,
                                    limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = "10/01/" + str(fiscal_year - 1)
        fy_end = "09/30/" + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += " action_date::Date BETWEEN %s AND %s"
            arguments += [fy_begin]
            arguments += [fy_end]
        query += " ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s"
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        fpds_normalized_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        fpds_field_map = {
            "officer_1_name": "high_comp_officer1_full_na",
            "officer_1_amount": "high_comp_officer1_amount",
            "officer_2_name": "high_comp_officer2_full_na",
            "officer_2_amount": "high_comp_officer2_amount",
            "officer_3_name": "high_comp_officer3_full_na",
            "officer_3_amount": "high_comp_officer3_amount",
            "officer_4_name": "high_comp_officer4_full_na",
            "officer_4_amount": "high_comp_officer4_amount",
            "officer_5_name": "high_comp_officer5_full_na",
            "officer_5_amount": "high_comp_officer5_amount",
        }

        logger.info("Getting total rows")

        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of procurement data")

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():

                if not (index % 100):
                    logger.info(
                        "D1 File Load: Loading row {} of {} ({})".format(
                            str(index), str(total_rows),
                            datetime.now() - start_time))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row["awarding_agency_code"] is None or len(
                        row["awarding_agency_code"].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[
                        row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[
                        awarding_subtier_agency_id]["toptier_agency_id"]
                    awarding_toptier_code = toptier_agency_map[
                        awarding_toptier_agency_id]
                    row["awarding_agency_code"] = awarding_toptier_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row["funding_agency_code"] is None or len(
                        row["funding_agency_code"].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(
                        row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = subtier_to_agency_map[
                            funding_subtier_agency_id]["toptier_agency_id"]
                        funding_toptier_code = toptier_agency_map[
                            funding_toptier_agency_id]
                    else:
                        funding_toptier_code = None
                    row["funding_agency_code"] = funding_toptier_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row["awarding_agency_code"],
                    row["awarding_sub_tier_agency_c"])
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get("piid"),
                    fain=row.get("fain"),
                    uri=row.get("uri"),
                    parent_award_piid=row.get("parent_award_id"),
                    generated_unique_award_id=row.get("unique_award_key"),
                )
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award":
                    award,
                    "awarding_agency":
                    awarding_agency,
                    "funding_agency":
                    Agency.get_by_toptier_subtier(
                        row["funding_agency_code"],
                        row["funding_sub_tier_agency_co"]),
                    "period_of_performance_start_date":
                    format_date(row["period_of_performance_star"]),
                    "period_of_performance_current_end_date":
                    format_date(row["period_of_performance_curr"]),
                    "action_date":
                    format_date(row["action_date"]),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=fpds_normalized_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True,
                )

                transaction = TransactionNormalized.get_or_create_transaction(
                    **transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),
                    row,
                    field_map=fpds_field_map,
                    as_dict=True  # thrown away
                )

                transaction_contract = TransactionFPDS(transaction=transaction,
                                                       **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass
def get_or_create_summary_award(awarding_agency=None,
                                piid=None,
                                fain=None,
                                uri=None,
                                parent_award_id=None,
                                save=True):
    """
    Given a set of award identifiers and awarding agency information,
    find a corresponding Award record. If we can't find one, create it.

    Returns:
        created: a list of new awards created, used to enable bulk insert
        summary_award: the summary award that the calling process can map to
    """
    # If an award transaction's ID is a piid, it's contract data
    # If the ID is fain or a uri, it's financial assistance. If the award transaction
    # has both a fain and a uri, include both.
    try:
        # Look for an existing award record

        # Check individual FILE C D linkage first
        lookup_kwargs = {'recipient_id__isnull': False}
        if piid is not None:
            lookup_kwargs['piid'] = piid
        if parent_award_id is not None:
            lookup_kwargs['parent_award__piid'] = parent_award_id
        if fain is not None:
            lookup_kwargs['fain'] = fain
        if uri is not None:
            lookup_kwargs['uri'] = uri

        award_queryset = Award.objects.filter(**lookup_kwargs)[:2]

        award_count = len(award_queryset)

        if award_count == 1:
            summary_award = award_queryset[0]
            return [], summary_award

        # if nothing found revert to looking for an award that this record could've already created
        lookup_kwargs = {"awarding_agency": awarding_agency}
        for i in [(piid, "piid"), (fain, "fain"), (uri, "uri")]:
            lookup_kwargs[i[1]] = i[0]
            if parent_award_id:
                # parent_award__piid
                lookup_kwargs["parent_award_piid"] = parent_award_id

        # Look for an existing award record
        summary_award = Award.objects \
            .filter(Q(**lookup_kwargs)) \
            .filter(awarding_agency=awarding_agency) \
            .first()
        if (summary_award is None and awarding_agency is not None
                and awarding_agency.toptier_agency.name !=
                awarding_agency.subtier_agency.name):
            # No award match found when searching by award id info +
            # awarding subtier agency. Relax the awarding agency
            # critera to just the toptier agency instead of the subtier
            # agency and try the search again.
            awarding_agency_toptier = Agency.get_by_toptier(
                awarding_agency.toptier_agency.cgac_code)

            summary_award = Award.objects \
                .filter(Q(**lookup_kwargs)) \
                .filter(awarding_agency=awarding_agency_toptier) \
                .first()

        if summary_award:
            return [], summary_award

        parent_created, parent_award = [], None

        # Now create the award record for this award transaction
        create_kwargs = {
            "awarding_agency": awarding_agency,
            "parent_award": parent_award,
            "parent_award_piid": parent_award_id
        }
        for i in [(piid, "piid"), (fain, "fain"), (uri, "uri")]:
            create_kwargs[i[1]] = i[0]
        summary_award = Award(**create_kwargs)
        created = [
            summary_award,
        ]
        created.extend(parent_created)

        if save:
            summary_award.save()

        return created, summary_award

    # Do not use bare except
    except ValueError:
        raise ValueError(
            'Unable to find or create an award with the provided information: '
            'piid={}, fain={}, uri={}, parent_id={}, awarding_agency={}'.
            format(piid, fain, uri, parent_award_id, awarding_agency))
    def load_awards(self, fpds_broker_data, total_rows):
        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Awards: Loading row {} of {} ({})'.format(str(index),
                                                                       str(total_rows),
                                                                       datetime.now() - start_time))

            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = self.subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = self.subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = self.toptier_agency_map[awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = self.subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = self.subtier_to_agency_map[funding_subtier_agency_id][
                        'toptier_agency_id']
                    funding_cgac_code = self.toptier_agency_map[funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = self.agency_no_sub_map.get((
                row['awarding_agency_code'],
                row["awarding_sub_tier_agency_c"]
            ))

            if awarding_agency is None:
                awarding_agency = self.agency_sub_only_map.get(row['awarding_agency_code'])

            # If we still haven't found the agency, try surmising it from subtier
            if awarding_agency is None:
                awarding_agency = self.agency_subtier_map.get(row['awarding_sub_tier_agency_c'])

            funding_agency = self.agency_no_sub_map.get((
                row['funding_agency_code'],
                row["funding_sub_tier_agency_co"]
            ))

            if funding_agency is None:
                funding_agency = self.agency_sub_only_map.get(row['funding_agency_code'])

            if funding_agency is None:
                funding_agency = self.agency_subtier_map.get(row['funding_sub_tier_agency_co'])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            piid = row.get('piid')
            award = self.award_map.get(piid)
            if award and awarding_agency is not None and award.awarding_agency_id is not None:
                if award.awarding_agency_id != awarding_agency.id:
                    award = None

            if not award:
                # create the award since it wasn't found
                create_kwargs = {'awarding_agency': awarding_agency, 'piid': piid}
                award = Award(**create_kwargs)
                award.parent_award = parent_award_lookup[index - 1]
                self.award_map[piid] = award
                award_bulk.append(award)

            award_lookup.append(award)

        logger.info('Bulk creating Awards (batch_size: {})...'.format(BATCH_SIZE))
        Award.objects.bulk_create(award_bulk, batch_size=BATCH_SIZE)
def load_file_d2(
        submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing
):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """

    d_start_time = time.time()

    if quick:
        setup_broker_fdw()

        parameters = {'broker_submission_id': submission_attributes.broker_submission_id}
        run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters)
        logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
        return

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"

    }

    legal_entity_location_value_map = {
        "recipient_flag": True
    }

    place_of_performance_value_map = {
        "place_of_performance_flag": True
    }

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    total_rows = len(award_financial_assistance_data)

    start_time = datetime.now()
    for index, row in enumerate(award_financial_assistance_data, 1):
        if not (index % 100):
            logger.info('D2 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                         str(total_rows),
                                                                         datetime.now() - start_time))

        row = row_preprocessor(row)

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row, legal_entity_location_value_map
        )

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
        )

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row, place_of_performance_value_map
        )

        # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code if funding_agency is not None
                else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'],
            row["awarding_sub_tier_agency_c"]
        )
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        award.save()

        award_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                            row["funding_sub_tier_agency_co"]),
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            'submission': submission_attributes,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
        transaction.save()

        fad_value_map = {
            "submission": submission_attributes,
            "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(),
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            as_dict=True)

        transaction_assistance = TransactionFABS.get_or_create_2(transaction=transaction, **financial_assistance_data)
        transaction_assistance.save()

    logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
Exemple #19
0
def insert_new_fabs(to_insert):
    fabs_normalized_field_map = {
        "type": "assistance_type",
        "description": "award_description",
        "funding_amount": "total_funding_amount",
    }

    fabs_field_map = {
        "officer_1_name": "high_comp_officer1_full_na",
        "officer_1_amount": "high_comp_officer1_amount",
        "officer_2_name": "high_comp_officer2_full_na",
        "officer_2_amount": "high_comp_officer2_amount",
        "officer_3_name": "high_comp_officer3_full_na",
        "officer_3_amount": "high_comp_officer3_amount",
        "officer_4_name": "high_comp_officer4_full_na",
        "officer_4_amount": "high_comp_officer4_amount",
        "officer_5_name": "high_comp_officer5_full_na",
        "officer_5_amount": "high_comp_officer5_amount",
    }

    update_award_ids = []
    for row in to_insert:
        upper_case_dict_values(row)

        # Find the toptier awards from the subtier awards
        awarding_agency = Agency.get_by_subtier_only(
            row["awarding_sub_tier_agency_c"])
        funding_agency = Agency.get_by_subtier_only(
            row["funding_sub_tier_agency_co"])

        # Create the summary Award
        (created, award) = Award.get_or_create_summary_award(
            generated_unique_award_id=row["unique_award_key"],
            fain=row["fain"],
            uri=row["uri"],
            record_type=row["record_type"],
        )
        award.save()

        # Append row to list of Awards updated
        update_award_ids.append(award.id)

        try:
            last_mod_date = datetime.strptime(str(row["modified_at"]),
                                              "%Y-%m-%d %H:%M:%S.%f").date()
        except ValueError:
            last_mod_date = datetime.strptime(str(row["modified_at"]),
                                              "%Y-%m-%d %H:%M:%S").date()

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            funding_agency,
            "period_of_performance_start_date":
            format_date(row["period_of_performance_star"]),
            "period_of_performance_current_end_date":
            format_date(row["period_of_performance_curr"]),
            "action_date":
            format_date(row["action_date"]),
            "last_modified_date":
            last_mod_date,
            "type_description":
            row["assistance_type_desc"],
            "transaction_unique_id":
            row["afa_generated_unique"],
            "business_categories":
            get_business_categories(row=row, data_type="fabs"),
        }

        transaction_normalized_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fabs_normalized_field_map,
            value_map=parent_txn_value_map,
            as_dict=True,
        )

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),
            row,
            field_map=fabs_field_map,
            as_dict=True  # thrown away
        )

        # Hack to cut back on the number of warnings dumped to the log.
        financial_assistance_data["updated_at"] = cast_datetime_to_utc(
            financial_assistance_data["updated_at"])
        financial_assistance_data["created_at"] = cast_datetime_to_utc(
            financial_assistance_data["created_at"])
        financial_assistance_data["modified_at"] = cast_datetime_to_utc(
            financial_assistance_data["modified_at"])

        afa_generated_unique = financial_assistance_data[
            "afa_generated_unique"]
        unique_fabs = TransactionFABS.objects.filter(
            afa_generated_unique=afa_generated_unique)

        if unique_fabs.first():
            transaction_normalized_dict["update_date"] = datetime.now(
                timezone.utc)
            transaction_normalized_dict["fiscal_year"] = fy(
                transaction_normalized_dict["action_date"])

            # Update TransactionNormalized
            TransactionNormalized.objects.filter(
                id=unique_fabs.first().transaction.id).update(
                    **transaction_normalized_dict)

            # Update TransactionFABS
            unique_fabs.update(**financial_assistance_data)
        else:
            # Create TransactionNormalized
            transaction_normalized = TransactionNormalized(
                **transaction_normalized_dict)
            transaction_normalized.save()

            # Create TransactionFABS
            transaction_fabs = TransactionFABS(
                transaction=transaction_normalized,
                **financial_assistance_data)
            transaction_fabs.save()

    return update_award_ids
Exemple #20
0
    def insert_new_fabs(self, to_insert, total_rows):
        logger.info('Starting insertion of new FABS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perform_country_n",
            "state_code": "place_of_perfor_state_code",
            "state_name": "place_of_perform_state_nam",
            "city_name": "place_of_performance_city",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "foreign_location_description": "place_of_performance_forei",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "performance_code": "place_of_performance_code",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "city_name": "legal_entity_city_name",
            "city_code": "legal_entity_city_code",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "foreign_location_description": "legal_entity_foreign_descr",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "foreign_city_name": "legal_entity_foreign_city"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info('Inserting Stale FABS: Inserting row {} of {} ({})'.format(str(index), str(total_rows),
                                                                                       datetime.now() - start_time))

            for key in row:
                if isinstance(row[key], str):
                    row[key] = row[key].upper()

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True})
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name if recipient_name is not None else ""
            )
            legal_entity_value_map = {
                "location": legal_entity_location,
                "business_categories": get_business_categories(row=row, data_type='fabs'),
                "business_types_description": get_business_type_description(row['business_types'])
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

            # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
            # non-numeric record type values
            record_type_int = int(row['record_type'])
            if record_type_int == 1:
                uri = row['uri'] if row['uri'] else '-NONE-'
                fain = '-NONE-'
            elif record_type_int == 2:
                uri = '-NONE-'
                fain = row['fain'] if row['fain'] else '-NONE-'
            else:
                raise Exception('Invalid record type encountered for the following afa_generated_unique record: %s' %
                                row['afa_generated_unique'])

            generated_unique_id = 'ASST_AW_' +\
                (row['awarding_sub_tier_agency_c'] if row['awarding_sub_tier_agency_c'] else '-NONE-') + '_' + \
                fain + '_' + uri

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(generated_unique_award_id=generated_unique_id,
                                                                 fain=row['fain'],
                                                                 uri=row['uri'],
                                                                 record_type=row['record_type'])
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award": award,
                "awarding_agency": awarding_agency,
                "funding_agency": funding_agency,
                "recipient": legal_entity,
                "place_of_performance": pop_location,
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
                "last_modified_date": last_mod_date,
                "type_description": get_assistance_type_description(row['assistance_type']),
                "transaction_unique_id": row['afa_generated_unique'],
                "generated_unique_award_id": generated_unique_id
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            afa_generated_unique = financial_assistance_data['afa_generated_unique']
            unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique)

            if unique_fabs.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

                # Update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # Update TransactionFABS
                unique_fabs.update(**financial_assistance_data)
            else:
                # Create TransactionNormalized
                transaction = TransactionNormalized(**transaction_normalized_dict)
                transaction.save()

                # Create TransactionFABS
                transaction_fabs = TransactionFABS(transaction=transaction, **financial_assistance_data)
                transaction_fabs.save()
Exemple #21
0
def load_file_c(submission_attributes, db_cursor, award_financial_frame):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try
    to join to those records to retrieve some additional information
    about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here
    # to ensure that we don't overwrite it as we change up the order of
    # file loading
    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')

    award_financial_frame['txn'] = award_financial_frame.apply(
        get_award_financial_transaction, axis=1)
    award_financial_frame['awarding_agency'] = award_financial_frame.apply(
        get_awarding_agency, axis=1)
    award_financial_frame['object_class'] = award_financial_frame.apply(
        get_or_create_object_class_rw, axis=1, logger=logger)
    award_financial_frame['program_activity'] = award_financial_frame.apply(
        get_or_create_program_activity,
        axis=1,
        submission_attributes=submission_attributes)

    # for row in award_financial_data:
    for row in award_financial_frame.replace({
            np.nan: None
    }).to_dict(orient='records'):
        # Check and see if there is an entry for this TAS
        treasury_account = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            raise Exception('Could not find appropriation account for TAS: ' +
                            row['tas'])

        # Find a matching transaction record, so we can use its
        # subtier agency information to match to (or create) an Award record

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        created, award = Award.get_or_create_summary_award(
            awarding_agency=row['awarding_agency'],
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'),
            use_cache=False)

        award_financial_data = FinancialAccountsByAwards()

        value_map = {
            'award': award,
            'submission': submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            'treasury_account': treasury_account,
            'object_class': row.get('object_class'),
            'program_activity': row.get('program_activity'),
        }

        # Still using the cpe|fyb regex compiled above for reverse
        afd = load_data_into_model(award_financial_data,
                                   row,
                                   value_map=value_map,
                                   save=True,
                                   reverse=reverse)

    awards_cache.clear()
Exemple #22
0
def load_file_d1(submission_attributes,
                 procurement_data,
                 db_cursor,
                 quick=False):
    """
    Process and load file D1 broker data (contract award txns).
    """

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {"place_of_performance_flag": True}

    legal_entity_location_value_map = {"recipient_flag": True}

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    d_start_time = time.time()

    if quick:
        parameters = {
            'broker_submission_id': submission_attributes.broker_submission_id
        }
        run_sql_file('usaspending_api/etl/management/load_file_d1.sql',
                     parameters)
        logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() -
                                                              d_start_time))
        return

    total_rows = len(procurement_data)

    start_time = datetime.now()
    for index, row in enumerate(procurement_data, 1):
        if not (index % 100):
            logger.info('D1 File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            copy(legal_entity_location_value_map))

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name)

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            copy(place_of_performance_value_map))

        # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(
                row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(
                row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code
                if funding_agency is not None else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_piid=row.get(
                'parent_award_id'))  # It is a FAIN/PIID/URI, not our db's pk
        award.save()

        award_update_id_list.append(award.id)
        award_contract_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(
            **transaction_dict)
        transaction.save()

        contract_value_map = {
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date":
            format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(
            TransactionFPDS(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=contract_value_map,
            as_dict=True)

        transaction_contract = TransactionFPDS(transaction=transaction,
                                               **contract_instance)
        transaction_contract.save()
    logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() -
                                                          d_start_time))
    def update_transaction_assistance(db_cursor,
                                      fiscal_year=None,
                                      page=1,
                                      limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of assistance data")

        # skip_count = 0

        # ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info(
            'Getting legal entity location objects for {} rows...'.format(
                len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map,
                row, {"recipient_flag": True},
                save=False)

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(
            len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info(
            'Getting place of performance objects for {} rows...'.format(
                len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map,
                row, {"place_of_performance_flag": True},
                save=False)

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(
            len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(
                    legal_entity,
                    row,
                    value_map=legal_entity_value_map,
                    save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(
            len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(
                    row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[
                    awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[
                    awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(
                    row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(
                    row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[
                        funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'], row["funding_sub_tier_agency_co"])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False)

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award":
                award_bulk[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "recipient":
                legal_entity_bulk[index - 1],
                "place_of_performance":
                pop_bulk[index - 1],
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(
                **transaction_dict)
            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(
            len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(
                transaction_normalized_bulk)
        except IntegrityError:
            logger.info(
                'Tried and failed to insert duplicate transaction_normalized row. Continuing... '
            )

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(
                transaction=transaction_normalized_bulk[index - 1],
                **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')
Exemple #24
0
def load_file_d2(submission_attributes,
                 award_financial_assistance_data,
                 db_cursor,
                 quick,
                 row_preprocessor=no_preprocessing):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """

    d_start_time = time.time()

    if quick:
        setup_broker_fdw()

        parameters = {
            'broker_submission_id': submission_attributes.broker_submission_id
        }
        run_sql_file('usaspending_api/etl/management/load_file_d2.sql',
                     parameters)
        logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() -
                                                              d_start_time))
        return

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    legal_entity_location_value_map = {"recipient_flag": True}

    place_of_performance_value_map = {"place_of_performance_flag": True}

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    total_rows = len(award_financial_assistance_data)

    start_time = datetime.now()
    for index, row in enumerate(award_financial_assistance_data, 1):
        if not (index % 100):
            logger.info('D2 File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        row = row_preprocessor(row)

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name)

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(
                row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(
                row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code
                if funding_agency is not None else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        award.save()

        award_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(
            **transaction_dict)
        transaction.save()

        fad_value_map = {
            "submission":
            submission_attributes,
            "cfda":
            Cfda.objects.filter(program_number=row['cfda_number']).first(),
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            as_dict=True)

        transaction_assistance = TransactionFABS.get_or_create_2(
            transaction=transaction, **financial_assistance_data)
        transaction_assistance.save()

    logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() -
                                                          d_start_time))
    def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"

        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of assistance data")

        # skip_count = 0


# ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True}, save=False
            )

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False
            )

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                                      recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                           recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'],
                row["awarding_sub_tier_agency_c"]
            )
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'],
                row["funding_sub_tier_agency_co"]
            )

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False
            )

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award": award_bulk[index - 1],
                "awarding_agency": awarding_agency_list[index - 1],
                "funding_agency": funding_agency_list[index - 1],
                "recipient": legal_entity_bulk[index - 1],
                "place_of_performance": pop_bulk[index - 1],
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict)
            transaction_normalized.fiscal_year = fy(transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(transaction_normalized_bulk)
        except IntegrityError:
            logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ')

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1],
                                                     **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')
def test_get_or_create_summary_award():
    """Test award record lookup."""
    sta1 = mommy.make(SubtierAgency,
                      subtier_code='1234',
                      name='Bureau of Effective Unit Tests')
    sta2 = mommy.make(SubtierAgency,
                      subtier_code='5678',
                      name='Bureau of Breaky Unit Tests')
    sta3 = mommy.make(SubtierAgency,
                      subtier_code='0509',
                      name='Bureau of Testing Bureaucracy')
    tta1 = mommy.make(ToptierAgency,
                      cgac_code='020',
                      name='Department of Unit Tests')
    tta2 = mommy.make(ToptierAgency,
                      cgac_code='089',
                      name='Department of Integrated Tests')

    a1 = mommy.make(Agency, id=1, toptier_agency=tta1, subtier_agency=sta1)
    a2 = mommy.make(Agency, id=2, toptier_agency=tta1)
    a3 = mommy.make(Agency, id=3, toptier_agency=tta1, subtier_agency=sta2)
    a4 = mommy.make(Agency, id=4, toptier_agency=tta2, subtier_agency=sta3)

    # match on awarding agency and piid
    m1 = mommy.make('awards.award', piid='DUT123', awarding_agency=a1)
    t1 = Award.get_or_create_summary_award(piid='DUT123',
                                           awarding_agency=a1)[1]
    assert t1 == m1

    # match on awarding agency and piid + parent award
    pa1 = mommy.make('awards.award', piid='IDVDUT456')
    m2 = mommy.make('awards.award',
                    piid='DUT456',
                    parent_award=pa1,
                    awarding_agency=a1)
    t2 = Award.get_or_create_summary_award(piid='DUT456',
                                           parent_award_id='IDVDUT456',
                                           awarding_agency=a1)[1]
    assert t2 == m2

    # match on awarding agency and fain
    m3 = mommy.make('awards.award', fain='DUT789', awarding_agency=a1)
    t3 = Award.get_or_create_summary_award(fain='DUT789',
                                           awarding_agency=a1)[1]
    assert t3 == m3

    # match on awarding agency and fain + uri (fain takes precedence, same uri)
    m4 = mommy.make('awards.award',
                    fain='DUT987',
                    uri='123-abc-456',
                    awarding_agency=a1)
    t4 = Award.get_or_create_summary_award(fain='DUT987',
                                           uri='123-abc-456',
                                           awarding_agency=a1)[1]
    assert t4 == m4

    # match on awarding agency and fain + uri (fain takes precedence, different uri)
    m5 = mommy.make('awards.award',
                    fain='DUT123456',
                    uri='123-abc-456',
                    awarding_agency=a1)
    t5 = Award.get_or_create_summary_award(fain='DUT123456',
                                           uri='123-abc-456-a-different-uri',
                                           awarding_agency=a1)[1]
    assert t5 == m5

    # match on awarding agency + uri
    m6 = mommy.make('awards.award', uri='abc-123-def', awarding_agency=a1)
    t6 = Award.get_or_create_summary_award(uri='abc-123-def',
                                           awarding_agency=a1)[1]
    assert t6 == m6

    # match on awarding toptier agency only
    m7 = mommy.make('awards.award', uri='kkk-bbb-jjj', awarding_agency=a2)
    t7 = Award.get_or_create_summary_award(uri='kkk-bbb-jjj',
                                           awarding_agency=a2)[1]
    assert m7 == t7

    # match on no awarding agency, uri
    m8 = mommy.make('awards.award', uri='mmm-fff-ddd')
    t8 = Award.get_or_create_summary_award(uri='mmm-fff-ddd')[1]
    assert m8 == t8

    # match on no awarding agency, fain
    m9 = mommy.make('awards.award', fain='DUTDUTDUT')
    t9 = Award.get_or_create_summary_award(fain='DUTDUTDUT')[1]
    assert m9 == t9

    # non-match with piid creates new award record
    m10 = mommy.make('awards.award', piid='imapiid')
    t10 = Award.get_or_create_summary_award(piid='imadifferentpiid')
    assert len(t10[0]) == 1
    assert m10 != t10[1]

    # non-match with piid + non-matching parent award creates two new awards
    pa11 = mommy.make('awards.award', piid='momofpiidsarefun')
    m11 = mommy.make('awards.award', piid='piidsarefun', parent_award=pa11)
    t11 = Award.get_or_create_summary_award(piid='piidsarefun',
                                            parent_award_id='dadofpiidsarefun')
    assert len(t11[0]) == 2
    assert m11 != t11[1]

    # non-match with piid + matching parent award creates one new award
    pa12 = mommy.make('awards.award', piid='thingmom')
    m12 = mommy.make('awards.award', piid='thing1', parent_award=pa12)
    t12 = Award.get_or_create_summary_award(piid='thing2',
                                            parent_award_id='thingmom')
    assert len(t12[0]) == 1
    assert m12 != t12[1]

    # matching piid + non-matching parent
    pa13 = mommy.make('awards.award', piid='piidthing')
    m13 = mommy.make('awards.award', piid='0005', parent_award=pa13)
    t13 = Award.get_or_create_summary_award(
        piid='0005', parent_award_id='anotherpiidthing')[1]
    assert t13 != m13

    # matching piid and parent award id but mismatched subtier agency, same top tier agency
    pa14 = mommy.make('awards.award', piid='imalittlepiid', awarding_agency=a1)
    m14 = mommy.make('awards.award',
                     piid='shortandstout',
                     parent_award=pa14,
                     awarding_agency=a1)
    t14 = Award.get_or_create_summary_award(piid='shortandstout',
                                            parent_award_id='imalittlepiid',
                                            awarding_agency=a3)[1]
    assert t14 != m14

    # matching fain but mismatched subtier agency, same top tier agency
    m15 = mommy.make('awards.award',
                     fain='imalittlefain',
                     parent_award=pa14,
                     awarding_agency=a1)
    t15 = Award.get_or_create_summary_award(fain='imalittlefain',
                                            awarding_agency=a3)[1]
    assert t15 != m15

    # matching piid and parent award but mismatched subtier and toptier agency
    pa16 = mommy.make('awards.Award',
                      piid='imjustapiidparent',
                      awarding_agency=a3)
    m16 = mommy.make('awards.Award',
                     piid='imjustapiidchild',
                     awarding_agency=a3)
    t16 = Award.get_or_create_summary_award(
        piid='imjustapiidchild',
        parent_award_id='imjustapiidparent',
        awarding_agency=a4)[1]
    assert t16 != m16
def load_file_d1(submission_attributes, procurement_data, db_cursor):
    """
    Process and load file D1 broker data (contract award txns).
    """
    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {"place_of_performance_flag": True}

    legal_entity_location_value_map = {"recipient_flag": True}

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    for row in procurement_data:
        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        # Create the legal entity if it doesn't exist
        try:
            legal_entity = LegalEntity.objects.get(
                recipient_unique_id=row['awardee_or_recipient_uniqu'])
        except ObjectDoesNotExist:
            legal_entity_value_map = {
                "location": legal_entity_location,
                "legal_entity_id": row['awardee_or_recipient_uniqu'],
            }
            legal_entity = load_data_into_model(
                LegalEntity(),
                row,
                value_map=legal_entity_value_map,
                save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up
        if row['awarding_agency_code'] is None:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        if row['funding_agency_code'] is None:
            row['funding_agency_code'] = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))
        award.save()

        AWARD_UPDATE_ID_LIST.append(award.id)
        AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_instance = load_data_into_model(
            Transaction(),
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction_instance, created = Transaction.objects.get_or_create(
            **transaction_instance)

        contract_value_map = {
            'transaction':
            transaction_instance,
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date":
            format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(TransactionContract(),
                                                 row,
                                                 field_map=contract_field_map,
                                                 value_map=contract_value_map,
                                                 save=True)
Exemple #28
0
    def load_awards(self, fabs_broker_data, total_rows):
        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info('Awards: Loading row {} of {} ({})'.format(
                    str(index), str(total_rows),
                    datetime.now() - start_time))

            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(
                    row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = self.subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = self.subtier_to_agency_map[
                    awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = self.toptier_agency_map[
                    awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(
                    row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = self.subtier_agency_map.get(
                    row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = self.subtier_to_agency_map[
                        funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = self.toptier_agency_map[
                        funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = self.agency_no_sub_map.get(
                (row['awarding_agency_code'],
                 row["awarding_sub_tier_agency_c"]))

            if awarding_agency is None:
                awarding_agency = self.agency_sub_only_map.get(
                    row['awarding_agency_code'])

            # If we still haven't found the agency, try surmising it from subtier
            if awarding_agency is None:
                awarding_agency = self.agency_subtier_map.get(
                    row['awarding_sub_tier_agency_c'])

            funding_agency = self.agency_no_sub_map.get(
                (row['funding_agency_code'],
                 row["funding_sub_tier_agency_co"]))

            if funding_agency is None:
                funding_agency = self.agency_sub_only_map.get(
                    row['funding_agency_code'])

            if funding_agency is None:
                funding_agency = self.agency_subtier_map.get(
                    row['funding_sub_tier_agency_co'])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            fain = row.get('fain')
            uri = row.get('uri')

            if awarding_agency:
                lookup_key = (fain, uri, awarding_agency.id)
                award = self.award_map.get(lookup_key)
            else:
                lookup_key = None
                award = None

            if not award:
                # create the award since it wasn't found
                create_kwargs = {
                    'awarding_agency': awarding_agency,
                    'fain': fain,
                    'uri': uri
                }
                award = Award(**create_kwargs)
                if awarding_agency and lookup_key:
                    self.award_map[lookup_key] = award
                award_bulk.append(award)

            award_lookup.append(award)

        logger.info(
            'Bulk creating Awards (batch_size: {})...'.format(BATCH_SIZE))
        Award.objects.bulk_create(award_bulk, batch_size=BATCH_SIZE)
Exemple #29
0
    def insert_new_fabs(self, to_insert):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perform_country_n",
            "state_code": "place_of_perfor_state_code",
            "state_name": "place_of_perform_state_nam",
            "city_name": "place_of_performance_city",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "foreign_location_description": "place_of_performance_forei",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "performance_code": "place_of_performance_code",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "city_name": "legal_entity_city_name",
            "city_code": "legal_entity_city_code",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "foreign_location_description": "legal_entity_foreign_descr",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "foreign_city_name": "legal_entity_foreign_city",
        }

        for row in to_insert:
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True})
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "",
                parent_recipient_unique_id=row['ultimate_parent_unique_ide'],
            )
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fabs'),
                "business_types_description":
                row['business_types_desc'],
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

            # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
            # non-numeric record type values
            record_type_int = int(row['record_type'])
            if record_type_int == 1:
                uri = row['uri'] if row['uri'] else '-NONE-'
                fain = '-NONE-'
            elif record_type_int in (2, 3):
                uri = '-NONE-'
                fain = row['fain'] if row['fain'] else '-NONE-'
            else:
                msg = "Invalid record type encountered for the following afa_generated_unique record: {}"
                raise Exception(msg.format(row['afa_generated_unique']))

            astac = row["awarding_sub_tier_agency_c"] if row[
                "awarding_sub_tier_agency_c"] else "-NONE-"
            generated_unique_id = "ASST_AW_{}_{}_{}".format(astac, fain, uri)

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                fain=row['fain'],
                uri=row['uri'],
                record_type=row['record_type'],
            )
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['modified_at']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "type_description":
                row['assistance_type_desc'],
                "transaction_unique_id":
                row['afa_generated_unique'],
                "generated_unique_award_id":
                generated_unique_id,
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
                "funding_amount": "total_funding_amount",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            financial_assistance_data = load_data_into_model(
                TransactionFABS(), row, as_dict=True)  # thrown away

            afa_generated_unique = financial_assistance_data[
                'afa_generated_unique']
            unique_fabs = TransactionFABS.objects.filter(
                afa_generated_unique=afa_generated_unique)

            if unique_fabs.first():
                transaction_normalized_dict["update_date"] = datetime.now(
                    timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # Update TransactionNormalized
                TransactionNormalized.objects.filter(
                    id=unique_fabs.first().transaction.id).update(
                        **transaction_normalized_dict)

                # Update TransactionFABS
                unique_fabs.update(**financial_assistance_data)
            else:
                # Create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # Create TransactionFABS
                transaction_fabs = TransactionFABS(transaction=transaction,
                                                   **financial_assistance_data)
                transaction_fabs.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = afa_generated_unique
            legal_entity.save()
def test_get_or_create_summary_award():
    """Test award record lookup."""
    sta1 = mommy.make(SubtierAgency,
                      subtier_code="1234",
                      name="Bureau of Effective Unit Tests")
    sta2 = mommy.make(SubtierAgency,
                      subtier_code="5678",
                      name="Bureau of Breaky Unit Tests")
    sta3 = mommy.make(SubtierAgency,
                      subtier_code="0509",
                      name="Bureau of Testing Bureaucracy")
    tta1 = mommy.make(ToptierAgency,
                      toptier_code="020",
                      name="Department of Unit Tests")
    tta2 = mommy.make(ToptierAgency,
                      toptier_code="089",
                      name="Department of Integrated Tests")

    a1 = mommy.make(Agency, id=1, toptier_agency=tta1, subtier_agency=sta1)
    a2 = mommy.make(Agency, id=2, toptier_agency=tta1)
    a3 = mommy.make(Agency, id=3, toptier_agency=tta1, subtier_agency=sta2)
    a4 = mommy.make(Agency, id=4, toptier_agency=tta2, subtier_agency=sta3)

    # match on awarding agency and piid
    m1 = mommy.make("awards.award", piid="DUT123", awarding_agency=a1)
    t1 = Award.get_or_create_summary_award(piid="DUT123",
                                           awarding_agency=a1)[1]
    assert t1 == m1

    # match on awarding agency and piid + parent award piid
    m2 = mommy.make("awards.award",
                    piid="DUT456",
                    parent_award_piid="IDVDUT456",
                    awarding_agency=a1)
    t2 = Award.get_or_create_summary_award(piid="DUT456",
                                           parent_award_piid="IDVDUT456",
                                           awarding_agency=a1)[1]
    assert t2 == m2

    # match on awarding agency and fain
    m3 = mommy.make("awards.award", fain="DUT789", awarding_agency=a1)
    t3 = Award.get_or_create_summary_award(fain="DUT789",
                                           awarding_agency=a1)[1]
    assert t3 == m3

    # match on awarding agency and fain + uri (fain takes precedence, same uri)
    m4 = mommy.make("awards.award", fain="DUT987", awarding_agency=a1)
    t4 = Award.get_or_create_summary_award(fain="DUT987",
                                           uri="123-abc-456",
                                           record_type="2",
                                           awarding_agency=a1)[1]
    assert t4 == m4

    # match on awarding agency + uri
    m5 = mommy.make("awards.award", uri="abc-123-def", awarding_agency=a1)
    t5 = Award.get_or_create_summary_award(uri="abc-123-def",
                                           record_type="1",
                                           awarding_agency=a1)[1]
    assert t5 == m5

    # match on awarding toptier agency only
    m6 = mommy.make("awards.award", uri="kkk-bbb-jjj", awarding_agency=a2)
    t6 = Award.get_or_create_summary_award(uri="kkk-bbb-jjj",
                                           record_type="1",
                                           awarding_agency=a2)[1]
    assert m6 == t6

    # match on no awarding agency, uri
    m7 = mommy.make("awards.award", uri="mmm-fff-ddd")
    t7 = Award.get_or_create_summary_award(uri="mmm-fff-ddd",
                                           record_type="1")[1]
    assert m7 == t7

    # match on no awarding agency, fain
    m8 = mommy.make("awards.award", fain="DUTDUTDUT")
    t8 = Award.get_or_create_summary_award(fain="DUTDUTDUT",
                                           record_type="2")[1]
    assert m8 == t8

    # non-match with piid creates new award record
    m9 = mommy.make("awards.award", piid="imapiid")
    t9 = Award.get_or_create_summary_award(piid="imadifferentpiid")
    assert len(t9[0]) == 1
    assert m9 != t9[1]

    # non-match with piid + matching parent award piid creates one new award
    m10 = mommy.make("awards.award",
                     piid="thing1",
                     parent_award_piid="thingmom")
    t10 = Award.get_or_create_summary_award(piid="thing2",
                                            parent_award_piid="thingmom")
    assert len(t10[0]) == 1
    assert m10 != t10[1]

    # matching piid + non-matching parent
    m11 = mommy.make("awards.award",
                     piid="0005",
                     parent_award_piid="piidthing")
    t11 = Award.get_or_create_summary_award(
        piid="0005", parent_award_piid="anotherpiidthing")
    assert m11 != t11[0]

    # matching piid and parent award id but mismatched subtier agency, same top tier agency
    m12 = mommy.make("awards.award",
                     piid="shortandstout",
                     parent_award_piid="imalittlepiid",
                     awarding_agency=a1)
    t12 = Award.get_or_create_summary_award(piid="shortandstout",
                                            parent_award_piid="imalittlepiid",
                                            awarding_agency=a3)[1]
    assert t12 != m12

    # matching fain but mismatched subtier agency, same top tier agency
    m13 = mommy.make("awards.award", fain="imalittlefain", awarding_agency=a1)
    t13 = Award.get_or_create_summary_award(fain="imalittlefain",
                                            record_type="2",
                                            awarding_agency=a3)[1]
    assert t13 != m13

    # matching piid and parent award but mismatched subtier and toptier agency
    mommy.make("awards.Award", piid="imjustapiidparent", awarding_agency=a3)
    m14 = mommy.make("awards.Award",
                     piid="imjustapiidchild",
                     awarding_agency=a3)
    t14 = Award.get_or_create_summary_award(
        piid="imjustapiidchild",
        parent_award_piid="imjustapiidparent",
        awarding_agency=a4)[1]
    assert t14 != m14

    # matching piid and parent award but mismatched subtier and toptier agency
    m15 = mommy.make("awards.Award",
                     generated_unique_award_id="this_is_generated_and_unique")
    t15 = Award.get_or_create_summary_award(
        generated_unique_award_id="this_is_generated_and_unique")
    assert m15 == t15[1]

    # matching piid and parent award but mismatched subtier and toptier agency
    m16 = mommy.make("awards.Award",
                     generated_unique_award_id="this_is_generated_and_unique")
    t16 = Award.get_or_create_summary_award(
        generated_unique_award_id="this_is_generated_unique_and_different")
    assert len(t16[0]) == 1
    assert m16 != t16[1]
Exemple #31
0
    def insert_new_fpds(self, to_insert, total_rows):
        logger.info('Starting insertion of new FPDS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info(
                    'Inserting Stale FPDS: Inserting row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            for key in row:
                if isinstance(row[key], str):
                    row[key] = row[key].upper()

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "")
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fpds'),
                "is_fpds":
                True
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \
                (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \
                (row['piid'] if row['piid'] else '-NONE-') + '_' + \
                (row['parent_award_id'] if row['parent_award_id'] else '-NONE-')

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row['piid'])
            award.parent_award_piid = row.get('parent_award_id')
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['last_modified']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row['detached_award_proc_unique'],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description"
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            contract_instance = load_data_into_model(
                TransactionFPDS(),  # thrown away
                row,
                as_dict=True)

            detached_award_proc_unique = contract_instance[
                'detached_award_proc_unique']
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()
def test_get_or_create_summary_award():
    """Test award record lookup."""
    sta1 = mommy.make(SubtierAgency, subtier_code='1234', name='Bureau of Effective Unit Tests')
    sta2 = mommy.make(SubtierAgency, subtier_code='5678', name='Bureau of Breaky Unit Tests')
    sta3 = mommy.make(SubtierAgency, subtier_code='0509', name='Bureau of Testing Bureaucracy')
    tta1 = mommy.make(ToptierAgency, cgac_code='020', name='Department of Unit Tests')
    tta2 = mommy.make(ToptierAgency, cgac_code='089', name='Department of Integrated Tests')

    a1 = mommy.make(Agency, id=1, toptier_agency=tta1, subtier_agency=sta1)
    a2 = mommy.make(Agency, id=2, toptier_agency=tta1)
    a3 = mommy.make(Agency, id=3, toptier_agency=tta1, subtier_agency=sta2)
    a4 = mommy.make(Agency, id=4, toptier_agency=tta2, subtier_agency=sta3)

    # match on awarding agency and piid
    m1 = mommy.make('awards.award', piid='DUT123', awarding_agency=a1)
    t1 = Award.get_or_create_summary_award(piid='DUT123', awarding_agency=a1)[1]
    assert t1 == m1

    # match on awarding agency and piid + parent award piid
    m2 = mommy.make('awards.award', piid='DUT456', parent_award_piid='IDVDUT456', awarding_agency=a1)
    t2 = Award.get_or_create_summary_award(piid='DUT456', parent_award_piid='IDVDUT456', awarding_agency=a1)[1]
    assert t2 == m2

    # match on awarding agency and fain
    m3 = mommy.make('awards.award', fain='DUT789', awarding_agency=a1)
    t3 = Award.get_or_create_summary_award(fain='DUT789', awarding_agency=a1)[1]
    assert t3 == m3

    # match on awarding agency and fain + uri (fain takes precedence, same uri)
    m4 = mommy.make('awards.award', fain='DUT987', awarding_agency=a1)
    t4 = Award.get_or_create_summary_award(fain='DUT987', uri='123-abc-456', record_type='2', awarding_agency=a1)[1]
    assert t4 == m4

    # match on awarding agency + uri
    m5 = mommy.make('awards.award', uri='abc-123-def', awarding_agency=a1)
    t5 = Award.get_or_create_summary_award(uri='abc-123-def', record_type='1', awarding_agency=a1)[1]
    assert t5 == m5

    # match on awarding toptier agency only
    m6 = mommy.make('awards.award', uri='kkk-bbb-jjj', awarding_agency=a2)
    t6 = Award.get_or_create_summary_award(uri='kkk-bbb-jjj', record_type='1', awarding_agency=a2)[1]
    assert m6 == t6

    # match on no awarding agency, uri
    m7 = mommy.make('awards.award', uri='mmm-fff-ddd')
    t7 = Award.get_or_create_summary_award(uri='mmm-fff-ddd', record_type='1')[1]
    assert m7 == t7

    # match on no awarding agency, fain
    m8 = mommy.make('awards.award', fain='DUTDUTDUT')
    t8 = Award.get_or_create_summary_award(fain='DUTDUTDUT', record_type='2')[1]
    assert m8 == t8

    # non-match with piid creates new award record
    m9 = mommy.make('awards.award', piid='imapiid')
    t9 = Award.get_or_create_summary_award(piid='imadifferentpiid')
    assert len(t9[0]) == 1
    assert m9 != t9[1]

    # non-match with piid + matching parent award piid creates one new award
    m10 = mommy.make('awards.award', piid='thing1', parent_award_piid='thingmom')
    t10 = Award.get_or_create_summary_award(piid='thing2', parent_award_piid='thingmom')
    assert len(t10[0]) == 1
    assert m10 != t10[1]

    # matching piid + non-matching parent
    m11 = mommy.make('awards.award', piid='0005', parent_award_piid='piidthing')
    t11 = Award.get_or_create_summary_award(piid='0005', parent_award_piid='anotherpiidthing')
    assert m11 != t11[0]

    # matching piid and parent award id but mismatched subtier agency, same top tier agency
    m12 = mommy.make('awards.award', piid='shortandstout', parent_award_piid='imalittlepiid', awarding_agency=a1)
    t12 = Award.get_or_create_summary_award(piid='shortandstout', parent_award_piid='imalittlepiid',
                                            awarding_agency=a3)[1]
    assert t12 != m12

    # matching fain but mismatched subtier agency, same top tier agency
    m13 = mommy.make('awards.award', fain='imalittlefain', awarding_agency=a1)
    t13 = Award.get_or_create_summary_award(fain='imalittlefain', record_type='2', awarding_agency=a3)[1]
    assert t13 != m13

    # matching piid and parent award but mismatched subtier and toptier agency
    mommy.make('awards.Award', piid='imjustapiidparent', awarding_agency=a3)
    m14 = mommy.make('awards.Award', piid='imjustapiidchild', awarding_agency=a3)
    t14 = Award.get_or_create_summary_award(piid='imjustapiidchild', parent_award_piid='imjustapiidparent',
                                            awarding_agency=a4)[1]
    assert t14 != m14

    # matching piid and parent award but mismatched subtier and toptier agency
    m15 = mommy.make('awards.Award', generated_unique_award_id='this_is_generated_and_unique')
    t15 = Award.get_or_create_summary_award(generated_unique_award_id='this_is_generated_and_unique')
    assert m15 == t15[1]

    # matching piid and parent award but mismatched subtier and toptier agency
    m16 = mommy.make('awards.Award', generated_unique_award_id='this_is_generated_and_unique')
    t16 = Award.get_or_create_summary_award(generated_unique_award_id='this_is_generated_unique_and_different')
    assert len(t16[0]) == 1
    assert m16 != t16[1]