Esempio n. 1
0
def test_get_by_subtier():
    """Test Agency lookup by subtier."""
    toptier = mommy.make("references.ToptierAgency",
                         toptier_code="xyz",
                         name="yo")
    subtier = mommy.make("references.SubtierAgency",
                         subtier_code="abc",
                         name="hi")

    mommy.make(
        "references.Agency",
        toptier_agency=toptier,
        subtier_agency=mommy.make("references.SubtierAgency",
                                  subtier_code="bbb"),
    )
    agency1 = mommy.make("references.Agency",
                         toptier_agency=toptier,
                         subtier_agency=subtier)

    # lookup should return agency w/ most recent updatea_date that
    # matches the subtier code
    assert Agency.get_by_subtier("abc") == agency1
    # if there's no match, we should get none
    assert Agency.get_by_subtier("nope") is None
    # if called with an empty argument, we should get None
    assert Agency.get_by_subtier("") is None
    assert Agency.get_by_subtier(None) is None
Esempio n. 2
0
def get_valid_awarding_agency(row):
    agency = None

    agency_subtier_code = row['contracting_office_aid']
    valid_subtier_code = agency_subtier_code and len(agency_subtier_code) > 0

    # Get the awarding agency
    if valid_subtier_code:
        agency = Agency.get_by_subtier(agency_subtier_code)

    return agency
def test_get_by_subtier():
    """Test Agency lookup by subtier."""
    toptier = mommy.make('references.ToptierAgency', cgac_code='xyz', name='yo')
    subtier = mommy.make('references.SubtierAgency', subtier_code='abc', name='hi')
    mommy.make(
        'references.Agency', toptier_agency=toptier, subtier_agency=subtier)
    mommy.make(
        'references.Agency',
        toptier_agency=toptier,
        subtier_agency=mommy.make('references.SubtierAgency', subtier_code='bbb'))
    agency1 = mommy.make(
        'references.Agency', toptier_agency=toptier, subtier_agency=subtier)

    # lookup should return agency w/ most recent updatea_date that
    # matches the subtier code
    assert Agency.get_by_subtier('abc') == agency1
    # if there's no match, we should get none
    assert Agency.get_by_subtier('nope') is None
    # if called with an empty argument, we should get None
    assert Agency.get_by_subtier('') is None
    assert Agency.get_by_subtier(None) is None
def get_valid_awarding_agency(row):
    agency_subtier_code = row['awarding_sub_tier_agency_c']
    agency_toptier_code = row['awarding_agency_code']
    valid_subtier_code = (agency_subtier_code and len(agency_subtier_code) > 0)
    valid_toptier_code = (agency_toptier_code and len(agency_toptier_code) > 0)

    if not valid_toptier_code and not valid_subtier_code:
        return None

    agency = None
    # Get the awarding agency
    if valid_subtier_code and valid_toptier_code:
        agency = Agency.get_by_toptier_subtier(row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])

    if not agency and valid_subtier_code:
        agency = Agency.get_by_subtier(row['awarding_sub_tier_agency_c'])

    if not agency and valid_toptier_code:
        agency = Agency.get_by_toptier(row['awarding_agency_code'])

    return agency
def get_valid_awarding_agency(row):
    agency_subtier_code = row['awarding_sub_tier_agency_c']
    agency_toptier_code = row['awarding_agency_code']
    valid_subtier_code = (agency_subtier_code and len(agency_subtier_code) > 0)
    valid_toptier_code = (agency_toptier_code and len(agency_toptier_code) > 0)

    if not valid_toptier_code and not valid_subtier_code:
        return None

    agency = None
    # Get the awarding agency
    if valid_subtier_code and valid_toptier_code:
        agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row['awarding_sub_tier_agency_c'])

    if not agency and valid_subtier_code:
        agency = Agency.get_by_subtier(row['awarding_sub_tier_agency_c'])

    if not agency and valid_toptier_code:
        agency = Agency.get_by_toptier(row['awarding_agency_code'])

    return agency
def load_file_d2(submission_attributes, award_financial_assistance_data,
                 db_cursor):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """
    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    legal_entity_location_value_map = {"recipient_flag": True}

    place_of_performance_value_map = {"place_of_performance_flag": True}

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    for row in award_financial_assistance_data:

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        # Create the legal entity if it doesn't exist
        try:
            legal_entity = LegalEntity.objects.get(
                recipient_unique_id=row['awardee_or_recipient_uniqu'])
        except ObjectDoesNotExist:
            legal_entity_value_map = {
                "location": legal_entity_location,
                "legal_entity_id": row['awardee_or_recipient_uniqu']
            }
            legal_entity = load_data_into_model(
                LegalEntity(),
                row,
                value_map=legal_entity_value_map,
                save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up
        if row['awarding_agency_code'] is None:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        if row['funding_agency_code'] is None:
            row['funding_agency_code'] = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))
        award.save()

        AWARD_UPDATE_ID_LIST.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_instance = load_data_into_model(
            Transaction(),
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction_instance, created = Transaction.objects.get_or_create(
            **transaction_instance)

        fad_value_map = {
            "transaction":
            transaction_instance,
            "submission":
            submission_attributes,
            "cfda":
            CFDAProgram.objects.filter(
                program_number=row['cfda_number']).first(),
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionAssistance(),
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            save=True)
def load_file_d1(submission_attributes, procurement_data, db_cursor):
    """
    Process and load file D1 broker data (contract award txns).
    """
    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {"place_of_performance_flag": True}

    legal_entity_location_value_map = {"recipient_flag": True}

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    for row in procurement_data:
        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        # Create the legal entity if it doesn't exist
        try:
            legal_entity = LegalEntity.objects.get(
                recipient_unique_id=row['awardee_or_recipient_uniqu'])
        except ObjectDoesNotExist:
            legal_entity_value_map = {
                "location": legal_entity_location,
                "legal_entity_id": row['awardee_or_recipient_uniqu'],
            }
            legal_entity = load_data_into_model(
                LegalEntity(),
                row,
                value_map=legal_entity_value_map,
                save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If awarding/funding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up
        if row['awarding_agency_code'] is None:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        if row['funding_agency_code'] is None:
            row['funding_agency_code'] = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"]).toptier_agency.cgac_code

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'))
        award.save()

        AWARD_UPDATE_ID_LIST.append(award.id)
        AWARD_CONTRACT_UPDATE_ID_LIST.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_instance = load_data_into_model(
            Transaction(),
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction_instance, created = Transaction.objects.get_or_create(
            **transaction_instance)

        contract_value_map = {
            'transaction':
            transaction_instance,
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date":
            format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(TransactionContract(),
                                                 row,
                                                 field_map=contract_field_map,
                                                 value_map=contract_value_map,
                                                 save=True)
Esempio n. 8
0
def load_file_d2(submission_attributes,
                 award_financial_assistance_data,
                 db_cursor,
                 quick,
                 row_preprocessor=no_preprocessing):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """

    d_start_time = time.time()

    if quick:
        setup_broker_fdw()

        parameters = {
            'broker_submission_id': submission_attributes.broker_submission_id
        }
        run_sql_file('usaspending_api/etl/management/load_file_d2.sql',
                     parameters)
        logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() -
                                                              d_start_time))
        return

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    legal_entity_location_value_map = {"recipient_flag": True}

    place_of_performance_value_map = {"place_of_performance_flag": True}

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    total_rows = len(award_financial_assistance_data)

    start_time = datetime.now()
    for index, row in enumerate(award_financial_assistance_data, 1):
        if not (index % 100):
            logger.info('D2 File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        row = row_preprocessor(row)

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            legal_entity_location_value_map)

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name)

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            place_of_performance_value_map)

        # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(
                row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(
                row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code
                if funding_agency is not None else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        award.save()

        award_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(
            **transaction_dict)
        transaction.save()

        fad_value_map = {
            "submission":
            submission_attributes,
            "cfda":
            Cfda.objects.filter(program_number=row['cfda_number']).first(),
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            as_dict=True)

        transaction_assistance = TransactionFABS.get_or_create_2(
            transaction=transaction, **financial_assistance_data)
        transaction_assistance.save()

    logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() -
                                                          d_start_time))
Esempio n. 9
0
def load_file_d1(submission_attributes,
                 procurement_data,
                 db_cursor,
                 quick=False):
    """
    Process and load file D1 broker data (contract award txns).
    """

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {"place_of_performance_flag": True}

    legal_entity_location_value_map = {"recipient_flag": True}

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    d_start_time = time.time()

    if quick:
        parameters = {
            'broker_submission_id': submission_attributes.broker_submission_id
        }
        run_sql_file('usaspending_api/etl/management/load_file_d1.sql',
                     parameters)
        logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() -
                                                              d_start_time))
        return

    total_rows = len(procurement_data)

    start_time = datetime.now()
    for index, row in enumerate(procurement_data, 1):
        if not (index % 100):
            logger.info('D1 File Load: Loading row {} of {} ({})'.format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row,
            copy(legal_entity_location_value_map))

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name)

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row,
            copy(place_of_performance_value_map))

        # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(
                row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(
                row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(
                row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code
                if funding_agency is not None else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_piid=row.get(
                'parent_award_id'))  # It is a FAIN/PIID/URI, not our db's pk
        award.save()

        award_update_id_list.append(award.id)
        award_contract_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                          row["funding_sub_tier_agency_co"]),
            "recipient":
            legal_entity,
            "place_of_performance":
            pop_location,
            'submission':
            submission_attributes,
            "period_of_performance_start_date":
            format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date":
            format_date(row['period_of_performance_curr']),
            "action_date":
            format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(
            **transaction_dict)
        transaction.save()

        contract_value_map = {
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date":
            format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(
            TransactionFPDS(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=contract_value_map,
            as_dict=True)

        transaction_contract = TransactionFPDS(transaction=transaction,
                                               **contract_instance)
        transaction_contract.save()
    logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() -
                                                          d_start_time))
def load_file_d2(
        submission_attributes, award_financial_assistance_data, db_cursor, quick, row_preprocessor=no_preprocessing
):
    """
    Process and load file D2 broker data (financial assistance award txns).
    """

    d_start_time = time.time()

    if quick:
        setup_broker_fdw()

        parameters = {'broker_submission_id': submission_attributes.broker_submission_id}
        run_sql_file('usaspending_api/etl/management/load_file_d2.sql', parameters)
        logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
        return

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "city_code": "legal_entity_city_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "county_code": "legal_entity_county_code",
        "county_name": "legal_entity_county_name",
        "foreign_city_name": "legal_entity_foreign_city",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "zip5": "legal_entity_zip5",
        "zip_last4": "legal_entity_zip_last4",
        "location_country_code": "legal_entity_country_code"
    }

    place_of_performance_field_map = {
        "city_name": "place_of_performance_city",
        "performance_code": "place_of_performance_code",
        "congressional_code": "place_of_performance_congr",
        "county_name": "place_of_perform_county_na",
        "foreign_location_description": "place_of_performance_forei",
        "state_name": "place_of_perform_state_nam",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"

    }

    legal_entity_location_value_map = {
        "recipient_flag": True
    }

    place_of_performance_value_map = {
        "place_of_performance_flag": True
    }

    fad_field_map = {
        "type": "assistance_type",
        "description": "award_description",
    }

    total_rows = len(award_financial_assistance_data)

    start_time = datetime.now()
    for index, row in enumerate(award_financial_assistance_data, 1):
        if not (index % 100):
            logger.info('D2 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                         str(total_rows),
                                                                         datetime.now() - start_time))

        row = row_preprocessor(row)

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row, legal_entity_location_value_map
        )

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
        )

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row, place_of_performance_value_map
        )

        # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code if funding_agency is not None
                else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'],
            row["awarding_sub_tier_agency_c"]
        )
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'))
        award.save()

        award_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                            row["funding_sub_tier_agency_co"]),
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            'submission': submission_attributes,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
        transaction.save()

        fad_value_map = {
            "submission": submission_attributes,
            "cfda": Cfda.objects.filter(program_number=row['cfda_number']).first(),
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
        }

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=fad_value_map,
            as_dict=True)

        transaction_assistance = TransactionFABS.get_or_create_2(transaction=transaction, **financial_assistance_data)
        transaction_assistance.save()

    logger.info('\n\n\n\nFile D2 time elapsed: {}'.format(time.time() - d_start_time))
def load_file_d1(submission_attributes, procurement_data, db_cursor, quick=False):
    """
    Process and load file D1 broker data (contract award txns).
    """

    legal_entity_location_field_map = {
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "location_country_code": "legal_entity_country_code",
        "city_name": "legal_entity_city_name",
        "congressional_code": "legal_entity_congressional",
        "state_code": "legal_entity_state_code",
        "zip4": "legal_entity_zip4"
    }

    place_of_performance_field_map = {
        # not sure place_of_performance_locat maps exactly to city name
        "city_name": "place_of_performance_locat",
        "congressional_code": "place_of_performance_congr",
        "state_code": "place_of_performance_state",
        "zip4": "place_of_performance_zip4a",
        "location_country_code": "place_of_perform_country_c"
    }

    place_of_performance_value_map = {
        "place_of_performance_flag": True
    }

    legal_entity_location_value_map = {
        "recipient_flag": True
    }

    contract_field_map = {
        "type": "contract_award_type",
        "description": "award_description"
    }

    d_start_time = time.time()

    if quick:
        parameters = {'broker_submission_id': submission_attributes.broker_submission_id}
        run_sql_file('usaspending_api/etl/management/load_file_d1.sql', parameters)
        logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))
        return

    total_rows = len(procurement_data)

    start_time = datetime.now()
    for index, row in enumerate(procurement_data, 1):
        if not (index % 100):
            logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                         str(total_rows),
                                                                         datetime.now() - start_time))

        legal_entity_location, created = get_or_create_location(
            legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
        )

        recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Create the legal entity if it doesn't exist
        legal_entity, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name
        )

        if created:
            legal_entity_value_map = {
                "location": legal_entity_location,
            }
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location, created = get_or_create_location(
            place_of_performance_field_map, row, copy(place_of_performance_value_map))

        # If awarding toptier agency code (aka CGAC) is not supplied on the D1 record,
        # use the sub tier code to look it up. This code assumes that all incoming
        # records will supply an awarding subtier agency code
        if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
            row['awarding_agency_code'] = Agency.get_by_subtier(
                row["awarding_sub_tier_agency_c"]).toptier_agency.cgac_code
        # If funding toptier agency code (aka CGAC) is empty, try using the sub
        # tier funding code to look it up. Unlike the awarding agency, we can't
        # assume that the funding agency subtier code will always be present.
        if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
            funding_agency = Agency.get_by_subtier(row["funding_sub_tier_agency_co"])
            row['funding_agency_code'] = (
                funding_agency.toptier_agency.cgac_code if funding_agency is not None
                else None)

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        awarding_agency = Agency.get_by_toptier_subtier(
            row['awarding_agency_code'],
            row["awarding_sub_tier_agency_c"]
        )
        created, award = Award.get_or_create_summary_award(
            awarding_agency=awarding_agency,
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_piid=row.get('parent_award_id'))  # It is a FAIN/PIID/URI, not our db's pk
        award.save()

        award_update_id_list.append(award.id)
        award_contract_update_id_list.append(award.id)

        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                            row["funding_sub_tier_agency_co"]),
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            'submission': submission_attributes,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
        }

        transaction_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=parent_txn_value_map,
            as_dict=True)

        transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
        transaction.save()

        contract_value_map = {
            'submission': submission_attributes,
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            "period_of_performance_potential_end_date": format_date(row['period_of_perf_potential_e'])
        }

        contract_instance = load_data_into_model(
            TransactionFPDS(),  # thrown away
            row,
            field_map=contract_field_map,
            value_map=contract_value_map,
            as_dict=True)

        transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
        transaction_contract.save()
    logger.info('\n\n\n\nFile D1 time elapsed: {}'.format(time.time() - d_start_time))