Python get_or_create_location Examples, usaspending_api.etl.helpers.get_or_create_location Python Examples

Example #1

0

Show file

File: test_helpers.py Project: toolness/usaspending-api

def test_get_or_create_location_creates_new_locations():
    """If no location is found, we create a new one"""
    ref = mommy.make(
        'references.RefCountryCode', country_code='USA', _fill_optional=True)
    row = dict(
        vendorcountrycode='USA',
        zipcode='12345-6789',
        streetaddress='Addy1',
        streetaddress2='Addy2',
        streetaddress3=None,
        vendor_state_code='ST',
        city='My Town')

    # this canonicalization step runs during load_submission, also
    row = canonicalize_location_dict(row)

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row, load_usaspending_contracts.location_mapper_vendor)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == ref
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.address_line1 == 'ADDY1'
    assert loc.address_line2 == 'ADDY2'
    assert loc.address_line3 is None
    assert loc.state_code == 'ST'
    assert loc.city_name == 'MY TOWN'

Example #2

0

Show file

File: test_helpers.py Project: toolness/usaspending-api

def test_get_or_create_fa_recipient_location_creates_new_locations():
    """If no location is found, we create a new one

    For financial assistance recipient locations."""
    ref = mommy.make(
        'references.RefCountryCode', country_code='USA', _fill_optional=True)
    row = dict(
        recipient_country_code='USA',
        recipient_zip='12345-6789',
        recipient_state_code='OH',
        recipient_county_name='MONTGOMERY', )

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row,
        load_usaspending_assistance.location_mapper_fin_assistance_recipient)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == ref
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.state_code == 'OH'
    assert loc.county_name == 'MONTGOMERY'

Example #3

0

Show file

def test_get_or_create_fa_place_of_performance_location_creates_new_locations(
):
    """If no location is found, we create a new one

    For financial assistance place of performance locations."""
    row = dict(
        principal_place_country_code='USA',
        principal_place_zip='12345-6789',
        principal_place_state_code='OH',
        principal_place_cc='MONTGOMERY',
    )

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row, load_usaspending_assistance.
        location_mapper_fin_assistance_principal_place)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == 'USA'
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.state_code == 'OH'
    assert loc.county_name == 'MONTGOMERY'

Example #4

0

Show file

File: test_helpers.py Project: fedspendingtransparency/usaspending-api

def test_get_or_create_location_creates_new_locations():
    """If no location is found, we create a new one"""
    row = dict(
        vendorcountrycode='USA',
        zipcode='12345-6789',
        streetaddress='Addy1',
        streetaddress2='Addy2',
        streetaddress3=None,
        vendor_state_code='ST',
        city='My Town')

    # this canonicalization step runs during load_submission, also
    row = canonicalize_location_dict(row)

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row, load_usaspending_contracts.location_mapper_vendor)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == 'USA'
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.address_line1 == 'ADDY1'
    assert loc.address_line2 == 'ADDY2'
    assert loc.address_line3 is None
    assert loc.state_code == 'ST'
    assert loc.city_name == 'MY TOWN'

Example #5

0

Show file

File: test_helpers.py Project: fedspendingtransparency/usaspending-api

def test_get_or_create_fa_place_of_performance_location_creates_new_locations(
):
    """If no location is found, we create a new one

    For financial assistance place of performance locations."""
    row = dict(
        principal_place_country_code='USA',
        principal_place_zip='12345-6789',
        principal_place_state_code='OH',
        principal_place_cc='MONTGOMERY', )

    # can't find it because we're looking at the US fields
    assert Location.objects.count() == 0

    helpers.get_or_create_location(
        row, load_usaspending_assistance.
        location_mapper_fin_assistance_principal_place)
    assert Location.objects.count() == 1

    loc = Location.objects.all().first()
    assert loc.location_country_code == 'USA'
    assert loc.zip5 == '12345'
    assert loc.zip_last4 == '6789'
    assert loc.state_code == 'OH'
    assert loc.county_name == 'MONTGOMERY'

Example #6

0

Show file

File: test_helpers.py Project: toolness/usaspending-api

def test_get_or_create_location_non_usa():
    """We should query different fields if it's a non-US row"""
    ref = mommy.make(
        'references.RefCountryCode', country_code='UAE', _fill_optional=True)
    expected = mommy.make(
        'references.Location',
        location_country_code=ref,
        zip5='12345',
        zip_last4='6789',
        # @todo: city_name has a different length than foreign_city_name, so
        # we can't use the random value
        city_name='AAAAAAAA',
        _fill_optional=True)

    row = dict(
        vendorcountrycode='UAE',
        zipcode='12345-6789',
        streetaddress=expected.address_line1,
        streetaddress2=expected.address_line2,
        streetaddress3=expected.address_line3,
        state=expected.state_code,
        city=expected.city_name)

    # can't find it because we're looking at the POP fields
    assert helpers.get_or_create_location(
        row, load_usaspending_contracts.
        location_mapper_place_of_performance) != expected

Example #7

0

Show file

def test_get_or_create_location_non_usa():
    """We should query different fields if it's a non-US row"""
    ref = mommy.make('references.RefCountryCode',
                     country_code='UAE',
                     _fill_optional=True)
    expected = mommy.make(
        'references.Location',
        location_country_code=ref,
        zip5='12345',
        zip_last4='6789',
        # @todo: can't use _fill_optional on this model because data
        # will exceed allowable index length
        address_line1='line one of address',
        address_line2='line two of address',
        address_line3='line thre of address',
        state_code='GG',
        city_name='AAAAAAAA')

    row = dict(vendorcountrycode='UAE',
               zipcode='12345-6789',
               streetaddress=expected.address_line1,
               streetaddress2=expected.address_line2,
               streetaddress3=expected.address_line3,
               state=expected.state_code,
               city=expected.city_name)

    # can't find it because we're looking at the POP fields
    assert helpers.get_or_create_location(
        row, load_usaspending_contracts.location_mapper_place_of_performance
    ) != expected

Example #8

0

Show file

File: test_helpers.py Project: fedspendingtransparency/usaspending-api

def test_get_or_create_location_non_usa():
    """We should query different fields if it's a non-US row"""
    expected = mommy.make(
        'references.Location',
        location_country_code='UAE',
        zip5='12345',
        zip_last4='6789',
        # @todo: can't use _fill_optional on this model because data
        # will exceed allowable index length
        address_line1='line one of address',
        address_line2='line two of address',
        address_line3='line thre of address',
        state_code='GG',
        city_name='AAAAAAAA')

    row = dict(
        vendorcountrycode='UAE',
        zipcode='12345-6789',
        streetaddress=expected.address_line1,
        streetaddress2=expected.address_line2,
        streetaddress3=expected.address_line3,
        state=expected.state_code,
        city=expected.city_name)

    # can't find it because we're looking at the POP fields
    assert helpers.get_or_create_location(
        row, load_usaspending_contracts.
        location_mapper_place_of_performance) != expected

Example #9

0

Show file

    def get_or_create_recipient(self, row):
        recipient_dict = {
            "location_id":
            h.get_or_create_location(
                row,
                mapper=location_mapper_fin_assistance_recipient).location_id,
            "recipient_name":
            row['recipient_name'],
            "recipient_unique_id":
            row['duns_no'],
        }

        recipient_type = row.get("recipient_type", ":").split(":")[1].strip()
        recipient_dict.update(self.recipient_flags_by_type(recipient_type))

        le = LegalEntity.objects.filter(
            recipient_unique_id=row['duns_no']).first()
        if not le:
            le = LegalEntity.objects.create(**recipient_dict)

        return le

Example #10

0

Show file

File: load_usaspending_assistance.py Project: codestaruser/usaspending-api

    def get_or_create_recipient(self, row):

        recipient_dict = {
            "location_id":
            h.get_or_create_location(
                row,
                mapper=location_mapper_fin_assistance_recipient).location_id,
            "recipient_name":
            row['recipient_name'],
        }

        recipient_type = row.get("recipient_type", ":").split(":")[1].strip()
        recipient_dict.update(self.recipient_flags_by_type(recipient_type))

        le, created = LegalEntity.get_or_create_by_duns(duns=row['duns_no'])
        if created:
            # Update from our recipient dictionary
            for attr, value in recipient_dict.items():
                setattr(le, attr, value)
            le.save()

        return le

Example #11

0

Show file

File: subaward_etl.py Project: codestaruser/usaspending-api

def load_subawards(submission_attributes, db_cursor):
    """
    Loads File F from the broker. db_cursor should be the db_cursor for Broker
    """
    # A list of award id's to update the subaward accounts and totals on
    award_ids_to_update = set()

    # Get a list of PIIDs from this submission
    awards_for_sub = Award.objects.filter(transaction__submission=submission_attributes).distinct()
    piids = list(awards_for_sub.values_list("piid", flat=True))
    fains = list(awards_for_sub.values_list("fain", flat=True))

    # This allows us to handle an empty list in the SQL without changing the query
    piids.append(None)
    fains.append(None)

    # D1 File F
    db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)])
    d1_f_data = dictfetchall(db_cursor)
    logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data)))
    d1_create_count = 0
    d1_update_count = 0
    d1_empty_count = 0

    for row in d1_f_data:
        if row['subcontract_num'] is None:
            if row['id'] is not None and row['subcontract_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d1_empty_count += 1
            continue

        # Get the agency
        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-contract to
        # We perform this lookup by finding the Award containing a transaction with
        # a matching parent award id, piid, and submission attributes
        award = Award.objects.filter(awarding_agency=agency,
                                     transaction__submission=submission_attributes,
                                     transaction__contract_data__piid=row['piid'],
                                     transaction__contract_data__isnull=False,
                                     transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by("-date_signed").first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn("Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(row['subcontract_num'], row['piid'], row['parent_award_id']))
            continue

        award_ids_to_update.add(award.id)

        # Find the recipient by looking up by duns
        recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns'])

        if created:
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.recipient_name = row['company_name']
            recipient.location = get_or_create_location(row, location_d1_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        d1_f_dict = {
            'award': award,
            'recipient': recipient,
            'submission': submission_attributes,
            'data_source': "DBR",
            'cfda': None,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subcontract_num'],
            'amount': row['subcontract_amount'],
            'description': row['overall_description'],
            'recovery_model_question1': row['recovery_model_q1'],
            'recovery_model_question2': row['recovery_model_q2'],
            'action_date': row['subcontract_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year'],
            'naics': row['naics'],
            'naics_description': row['naics_description'],
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'],
                                                              award=award,
                                                              defaults=d1_f_dict)
        if created:
            d1_create_count += 1
        else:
            d1_update_count += 1

    # D2 File F
    db_cursor.execute(D2_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(fains)])
    d2_f_data = dictfetchall(db_cursor)
    logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data)))
    d2_create_count = 0
    d2_update_count = 0
    d2_empty_count = 0

    for row in d2_f_data:
        if row['subaward_num'] is None:
            if row['id'] is not None and row['subaward_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d2_empty_count += 1
            continue

        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(row['subaward_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-award to
        # We perform this lookup by finding the Award containing a transaction with
        # a matching fain and submission. If this fails, try submission and uri
        if row['fain'] and len(row['fain']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         transaction__submission=submission_attributes,
                                         transaction__assistance_data__isnull=False,
                                         transaction__assistance_data__fain=row['fain']).distinct().order_by("-date_signed").first()

        # Couldn't find a match on FAIN, try URI if it exists
        if not award and row['uri'] and len(row['uri']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         transaction__submission=submission_attributes,
                                         transaction__assistance_data__isnull=False,
                                         transaction__assistance_data__uri=row['uri']).distinct().first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; skipping...".format(row['subaward_num'], row['fain'], row['uri']))
            continue

        award_ids_to_update.add(award.id)

        # Find the recipient by looking up by duns
        recipient, created = LegalEntity.get_or_create_by_duns(duns=row['duns'])

        if created:
            recipient_name = row['awardee_name']
            if recipient_name is None:
                recipient_name = row['awardee_or_recipient_legal']
            if recipient_name is None:
                recipient_name = ""

            recipient.recipient_name = recipient_name
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.location = get_or_create_location(row, location_d2_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        # Get CFDA Program
        cfda = Cfda.objects.filter(program_number=row['cfda_number']).first()

        d2_f_dict = {
            'award': award,
            'recipient': recipient,
            'submission': submission_attributes,
            'data_source': "DBR",
            'cfda': cfda,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subaward_num'],
            'amount': row['subaward_amount'],
            'description': row['project_description'],
            'recovery_model_question1': row['compensation_q1'],
            'recovery_model_question2': row['compensation_q2'],
            'action_date': row['subaward_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year'],
            'naics': None,
            'naics_description': None,
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(subaward_number=row['subaward_num'],
                                                              award=award,
                                                              defaults=d2_f_dict)
        if created:
            d2_create_count += 1
        else:
            d2_update_count += 1

    # Update Award objects with subaward aggregates
    update_award_subawards(tuple(award_ids_to_update))

    logger.info(
        """Submission {}
           Subcontracts created: {}
           Subcontracts updated: {}
           Empty subcontract rows: {}
           Subawards created: {}
           Subawards updated: {}
           Empty subaward rows: {}""".format(submission_attributes.broker_submission_id,
                                             d1_create_count,
                                             d1_update_count,
                                             d1_empty_count,
                                             d2_create_count,
                                             d2_update_count,
                                             d2_empty_count))

Example #12

0

Show file

File: load_usaspending_assistance.py Project: codestaruser/usaspending-api

    def handle(self, *args, **options):

        h.clear_caches()

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_assistance_list = []

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency = self.get_awarding_agency(
                row)  # todo: use agency dict?

            # Create the transaction object for this row
            txn_dict = {
                "submission":
                subattr,
                "action_date":
                h.convert_date(row['obligation_action_date']),
                "action_type":
                h.up2colon(row['action_type']),
                "award":
                self.get_or_create_award(row, awarding_agency=awarding_agency),
                "awarding_agency":
                awarding_agency,
                "description":
                row["project_description"],  # ?? account_title is anther contender?
                "data_source":
                "USA",
                "federal_action_obligation":
                row["fed_funding_amount"],
                "last_modified_date":
                h.convert_date(row['last_modified_date']),
                "modification_number":
                row["federal_award_mod"],  # ??
                "period_of_performance_start_date":
                h.convert_date(row['starting_date']),
                "period_of_performance_current_end_date":
                h.convert_date(row['ending_date']),
                "place_of_performance":
                h.get_or_create_location(
                    row, location_mapper_fin_assistance_principal_place),
                "recipient":
                self.get_or_create_recipient(row),
                "type":
                h.up2colon(row['assistance_type']),
                "usaspending_unique_transaction_id":
                row["unique_transaction_id"],

                # ??"funding_agency_id":
                # ?? "certified date":
            }
            txn = Transaction(**txn_dict)
            txn.fiscal_year = fy(txn.action_date)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_assistance_dict = {
                "submission":
                subattr,
                "fain":
                row["federal_award_id"],
                "uri":
                row["uri"],
                "cfda":
                Cfda.objects.filter(
                    program_number=row["cfda_program_num"]).first(),
                "correction_late_delete_indicator":
                h.up2colon(row['correction_late_ind']),
                "face_value_loan_guarantee":
                row["face_loan_guran"],
                "fiscal_year_and_quarter_correction":
                row["fyq_correction"],
                "non_federal_funding_amount":
                row["non_fed_funding_amount"],
                "original_loan_subsidy_cost":
                row["orig_sub_guran"],  # ??
                "record_type":
                int(h.up2colon(row['record_type'])),
                "sai_number":
                row["sai_number"],
                "submitted_type":
                "C",  # ?? For CSV?
            }
            # ?? business_funds_indicator
            # ?? reporting period start/end??

            txn_assistance = TransactionAssistance(**txn_assistance_dict)
            txn_assistance_list.append(txn_assistance)

        # Bulk insert transaction rows
        self.logger.info(
            "Starting Transaction bulk insert ({} records)".format(
                len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn assistance list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_assistance_list):
            t.transaction = txn_list[idx]
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction assistance rows
        self.logger.info(
            "Starting TransactionAssistance bulk insert ({} records)".format(
                len(txn_assistance_list)))
        TransactionAssistance.objects.bulk_create(txn_assistance_list)
        self.logger.info("Completed TransactionAssistance bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        update_model_description_fields()
        self.logger.info("Completed Awards update ({} records)".format(count))

Example #13

0

Show file

File: subaward_etl.py Project: fedspendingtransparency/usaspending-api

def load_subawards(submission_attributes, awards_touched, db_cursor):
    """
    Loads File F from the broker. db_cursor should be the db_cursor for Broker
    """
    # A list of award id's to update the subaward accounts and totals on
    award_ids_to_update = set()

    # Get a list of PIIDs from this submission
    # TODO: URIS
    awards_touched = [Award.objects.filter(id=award_id).first() for award_id in awards_touched]
    piids = list([award.piid for award in awards_touched if award.piid])
    fains = list([award.fain for award in awards_touched if award.fain])
    uris = list([award.uri for award in awards_touched if award.uri])

    # This allows us to handle an empty list in the SQL without changing the query
    piids.append(None)
    fains.append(None)

    # D1 File F
    db_cursor.execute(D1_FILE_F_QUERY, [submission_attributes.broker_submission_id, tuple(piids)])
    d1_f_data = dictfetchall(db_cursor)
    logger.info("Creating D1 F File Entries (Subcontracts): {}".format(len(d1_f_data)))
    d1_create_count = 0
    d1_update_count = 0
    d1_empty_count = 0

    for row in d1_f_data:
        if row['subcontract_num'] is None:
            if row['id'] is not None and row['subcontract_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d1_empty_count += 1
            continue

        # Get the agency
        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn(
                "Subaward number {} cannot find matching agency with toptier code {} and subtier code {}".format(
                    row['subcontract_num'], row['awarding_agency_code'], row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-contract to
        # We perform this lookup by finding the Award containing a transaction with a matching parent award id, piid,
        # and submission attributes
        award = Award.objects.filter(
            awarding_agency=agency,
            latest_transaction__contract_data__piid=row['piid'],
            latest_transaction__contract_data__parent_award_id=row['parent_award_id']).distinct().order_by(
            "-date_signed").first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn(
                "Subcontract number {} cannot find matching award with piid {}, parent_award_id {}; skipping...".format(
                    row['subcontract_num'], row['piid'], row['parent_award_id']))
            continue

        award_ids_to_update.add(award.id)

        # Get or create unique DUNS-recipient pair
        recipient, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['duns'],
            recipient_name=row['company_name']
        )

        if created:
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.location = get_or_create_location(row, location_d1_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        d1_f_dict = {
            'award': award,
            'recipient': recipient,
            'data_source': "DBR",
            'cfda': None,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subcontract_num'],
            'amount': row['subcontract_amount'],
            'description': row['overall_description'],
            'recovery_model_question1': row['recovery_model_q1'],
            'recovery_model_question2': row['recovery_model_q2'],
            'action_date': row['subcontract_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year']
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(subaward_number=row['subcontract_num'],
                                                              award=award,
                                                              defaults=d1_f_dict)
        if created:
            d1_create_count += 1
        else:
            d1_update_count += 1

    # D2 File F
    db_cursor.execute(D2_FILE_F_QUERY, [tuple(fains), tuple(uris)])
    d2_f_data = dictfetchall(db_cursor)
    logger.info("Creating D2 F File Entries (Subawards): {}".format(len(d2_f_data)))
    d2_create_count = 0
    d2_update_count = 0
    d2_empty_count = 0

    for row in d2_f_data:
        if row['subaward_num'] is None:
            if row['id'] is not None and row['subaward_amount'] is not None:
                logger.warn("Subcontract of broker id {} has amount, but no number".format(row["id"]))
                logger.warn("Failing row: {}".format(row))
            else:
                d2_empty_count += 1
            continue

        agency = get_valid_awarding_agency(row)

        if not agency:
            logger.warn("Subaward number {} cannot find matching agency with toptier code {} and subtier "
                        "code {}".format(row['subaward_num'], row['awarding_agency_code'],
                                         row['awarding_sub_tier_agency_c']))
            continue

        # Find the award to attach this sub-award to
        # We perform this lookup by finding the Award containing a transaction with a matching fain and submission.
        # If this fails, try submission and uri
        if row['fain'] and len(row['fain']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         latest_transaction__assistance_data__fain=row['fain']).\
                distinct().order_by("-date_signed").first()

        # Couldn't find a match on FAIN, try URI if it exists
        if not award and row['uri'] and len(row['uri']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         latest_transaction__assistance_data__uri=row['uri']).distinct().first()

        # Try both
        if not award and row['fain'] and len(row['fain']) > 0 and row['uri'] and len(row['uri']) > 0:
            award = Award.objects.filter(awarding_agency=agency,
                                         latest_transaction__assistance_data__fain=row['fain'],
                                         latest_transaction__assistance_data__uri=row['uri']).\
                distinct().order_by("-date_signed").first()

        # We don't have a matching award for this subcontract, log a warning and continue to the next row
        if not award:
            logger.warn("Subaward number {} cannot find matching award with fain {}, uri {}; "
                        "skipping...".format(row['subaward_num'], row['fain'], row['uri']))
            continue

        award_ids_to_update.add(award.id)

        recipient_name = row['awardee_name']
        if recipient_name is None:
            recipient_name = row['awardee_or_recipient_legal']
        if recipient_name is None:
            recipient_name = ""

        # Get or create unique DUNS-recipient pair
        recipient, created = LegalEntity.objects.get_or_create(
            recipient_unique_id=row['duns'],
            recipient_name=recipient_name
        )

        if created:
            recipient.parent_recipient_unique_id = row['parent_duns']
            recipient.location = get_or_create_location(row, location_d2_recipient_mapper)
            recipient.save()

        # Get or create POP
        place_of_performance = get_or_create_location(row, pop_mapper)

        # Get CFDA Program
        cfda = Cfda.objects.filter(program_number=row['cfda_number']).first()

        d2_f_dict = {
            'award': award,
            'recipient': recipient,
            'data_source': "DBR",
            'cfda': cfda,
            'awarding_agency': award.awarding_agency,
            'funding_agency': award.funding_agency,
            'place_of_performance': place_of_performance,
            'subaward_number': row['subaward_num'],
            'amount': row['subaward_amount'],
            'description': row['project_description'],
            'recovery_model_question1': row['compensation_q1'],
            'recovery_model_question2': row['compensation_q2'],
            'action_date': row['subaward_date'],
            'award_report_fy_month': row['report_period_mon'],
            'award_report_fy_year': row['report_period_year']
        }

        # Create the subaward
        subaward, created = Subaward.objects.update_or_create(
            subaward_number=row['subaward_num'],
            award=award,
            defaults=d2_f_dict
        )
        if created:
            d2_create_count += 1
        else:
            d2_update_count += 1

    # Update Award objects with subaward aggregates
    update_award_subawards(tuple(award_ids_to_update))

    logger.info(
        """Submission {}
           Subcontracts created: {}
           Subcontracts updated: {}
           Empty subcontract rows: {}
           Subawards created: {}
           Subawards updated: {}
           Empty subaward rows: {}""".format(submission_attributes.broker_submission_id,
                                             d1_create_count,
                                             d1_update_count,
                                             d1_empty_count,
                                             d2_create_count,
                                             d2_update_count,
                                             d2_empty_count))

Example #14

0

Show file

    def handle(self, *args, **options):

        csv_file = options['file'][0]
        self.logger.info("Starting load for file {}".format(csv_file))

        # Create the csv reader
        reader = CsvDataReader(csv_file)

        # Create a new submission attributes object for this timestamp
        subattr = SubmissionAttributes()
        subattr.usaspending_update = datetime.now()
        subattr.save()

        # Create lists to hold model instances for bulk insert
        txn_list = []
        txn_contract_list = []

        subtier_agency_dict = h.get_subtier_agency_dict()

        # Store some additional support data needed for the laod
        award_type_dict = {a[0]: a[1] for a in AWARD_TYPES}
        contract_pricing_dict = {c[0]: c[1] for c in CONTRACT_PRICING_TYPES}

        for idx, row in enumerate(reader):
            if len(reader) % 1000 == 0:
                self.logger.info("Read row {}".format(len(reader)))
            row = h.cleanse_values(row)

            awarding_agency_id = self.get_agency_id(row["contractingofficeagencyid"], subtier_agency_dict)

            # Create the transaction object for this row
            txn_dict = {
                "action_date": h.convert_date(row['signeddate']),
                "award": self.get_or_create_award(row, awarding_agency_id),
                "awarding_agency_id": awarding_agency_id,
                "data_source": "USA",
                "description": row["descriptionofcontractrequirement"],
                "federal_action_obligation": row["dollarsobligated"],
                "funding_agency_id": self.get_agency_id(row["fundingrequestingagencyid"], subtier_agency_dict),
                "last_modified_date": h.convert_date(row['last_modified_date']),
                "modification_number": row["modnumber"],
                "place_of_performance": h.get_or_create_location(
                    row, mapper=location_mapper_place_of_performance),
                "period_of_performance_current_end_date": h.convert_date(row['currentcompletiondate']),
                "period_of_performance_start_date": h.convert_date(row['effectivedate']),
                "recipient": self.get_or_create_recipient(row),
                "submission": subattr,
                "type": evaluate_contract_award_type(row),
                "type_description": award_type_dict.get(evaluate_contract_award_type(row)),
                "usaspending_unique_transaction_id": row["unique_transaction_id"]
            }
            txn = Transaction(**txn_dict)
            txn_list.append(txn)

            # Create the transaction contract object for this row
            txn_contract_dict = {
                "submission": subattr,
                "piid": row['piid'],
                "parent_award_id": row['idvpiid'],
                "current_total_value_award": h.parse_numeric_value(row["baseandexercisedoptionsvalue"]),
                "period_of_performance_potential_end_date": h.convert_date(row['ultimatecompletiondate']),
                "potential_total_value_of_award": h.parse_numeric_value(row["baseandalloptionsvalue"]),
                "epa_designated_product": self.parse_first_character(row['useofepadesignatedproducts']),
                "gfe_gfp": h.up2colon(row['gfe_gfp']),
                "cost_or_pricing_data": h.up2colon(row['costorpricingdata']),
                "type_of_contract_pricing": h.up2colon(row['typeofcontractpricing']),
                "type_of_contract_pricing_description": contract_pricing_dict.get(h.up2colon(row['typeofcontractpricing'])),
                "multiple_or_single_award_idv": h.up2colon(row['multipleorsingleawardidc']),
                "naics": h.up2colon(row['nationalinterestactioncode']),
                "dod_claimant_program_code": h.up2colon(row['claimantprogramcode']),
                "commercial_item_acquisition_procedures": h.up2colon(
                    row['commercialitemacquisitionprocedures']),
                "commercial_item_test_program": h.up2colon(row['commercialitemtestprogram']),
                "consolidated_contract": h.up2colon(row['consolidatedcontract']),
                "contingency_humanitarian_or_peacekeeping_operation": h.up2colon(
                    row['contingencyhumanitarianpeacekeepingoperation']),
                "contract_bundling": h.up2colon(row['contractbundling']),
                "contract_financing": h.up2colon(row['contractfinancing']),
                "contracting_officers_determination_of_business_size": h.up2colon(
                    row['contractingofficerbusinesssizedetermination']),
                "country_of_product_or_service_origin": h.up2colon(row['countryoforigin']),
                "davis_bacon_act": h.up2colon(row['davisbaconact']),
                "evaluated_preference": h.up2colon(row['evaluatedpreference']),
                "extent_competed": h.up2colon(row['extentcompeted']),
                "information_technology_commercial_item_category": h.up2colon(
                    row['informationtechnologycommercialitemcategory']),
                "interagency_contracting_authority": h.up2colon(row['interagencycontractingauthority']),
                "local_area_set_aside": h.up2colon(row['localareasetaside']),
                "purchase_card_as_payment_method": h.up2colon(row['purchasecardaspaymentmethod']),
                "multi_year_contract": h.up2colon(row['multiyearcontract']),
                "national_interest_action": h.up2colon(row['nationalinterestactioncode']),
                "number_of_actions": h.up2colon(row['numberofactions']),
                "number_of_offers_received": h.up2colon(row['numberofoffersreceived']),
                "performance_based_service_acquisition": h.up2colon(row['performancebasedservicecontract']),
                "place_of_manufacture": h.up2colon(row['placeofmanufacture']),
                "product_or_service_code": h.up2colon(row['productorservicecode']),
                "recovered_materials_sustainability": h.up2colon(row['recoveredmaterialclauses']),
                "research": h.up2colon(row['research']),
                "sea_transportation": h.up2colon(row['seatransportation']),
                "service_contract_act": h.up2colon(row['servicecontractact']),
                "small_business_competitiveness_demonstration_program": self.parse_first_character(
                    row['smallbusinesscompetitivenessdemonstrationprogram']),
                "solicitation_procedures": h.up2colon(row['solicitationprocedures']),
                "subcontracting_plan": h.up2colon(row['subcontractplan']),
                "type_set_aside": h.up2colon(row['typeofsetaside']),
                "walsh_healey_act": h.up2colon(row['walshhealyact']),
                "rec_flag": self.parse_first_character(h.up2colon(row['rec_flag'])),
                "type_of_idc": self.parse_first_character(row['typeofidc']),
                "a76_fair_act_action": self.parse_first_character(row['a76action']),
                "clinger_cohen_act_planning": self.parse_first_character(row['clingercohenact']),
                "cost_accounting_standards": self.parse_first_character(
                    row['costaccountingstandardsclause']),
                "fed_biz_opps": self.parse_first_character(row['fedbizopps']),
                "foreign_funding": self.parse_first_character(row['fundedbyforeignentity']),
                "major_program": self.parse_first_character(row['majorprogramcode']),
                "program_acronym": self.parse_first_character(row['programacronym']),
                "referenced_idv_modification_number": self.parse_first_character(
                    row['idvmodificationnumber']),
                "transaction_number": self.parse_first_character(row['transactionnumber']),
                "solicitation_identifier": self.parse_first_character(row['solicitationid'])
            }
            txn_contract = TransactionContract(**txn_contract_dict)
            txn_contract_list.append(txn_contract)

        # Bulk insert transaction rows
        self.logger.info("Starting Transaction bulk insert ({} records)".format(len(txn_list)))
        Transaction.objects.bulk_create(txn_list)
        self.logger.info("Completed Transaction bulk insert")
        # Update txn contract list with newly-inserted transactions
        award_id_list = []  # we'll need this when updating the awards later on
        for idx, t in enumerate(txn_contract_list):
            # add transaction info to this TransactionContract object
            t.transaction = txn_list[idx]
            # add the corresponding award id to a list we'll use when batch-updating award data
            award_id_list.append(txn_list[idx].award_id)
        # Bulk insert transaction contract rows
        self.logger.info("Starting TransactionContract bulk insert ({} records)".format(len(txn_contract_list)))
        TransactionContract.objects.bulk_create(txn_contract_list)
        self.logger.info("Completed TransactionContract bulk insert")

        # Update awards to reflect latest transaction information
        # (note that this can't be done via signals or a save()
        # override in the model itself, because those aren't
        # triggered by a bulk update
        self.logger.info("Starting Awards update")
        count = update_awards(tuple(award_id_list))
        update_contract_awards(tuple(award_id_list))
        self.logger.info("Completed Awards update ({} records)".format(count))

Example #15

0

Show file

    def get_or_create_recipient(self, row):
        recipient_dict = {
            "location_id": h.get_or_create_location(row, mapper=location_mapper_vendor).location_id,
            "recipient_name": row['vendorname'],
            "vendor_phone_number": row['phoneno'],
            "vendor_fax_number": row['faxno'],
            "recipient_unique_id": row['dunsnumber'],
            "parent_recipient_unique_id": row['parentdunsnumber'],
            "city_local_government": self.parse_first_character(row['iscitylocalgovernment']),
            "county_local_government": self.parse_first_character(row['iscountylocalgovernment']),
            "inter_municipal_local_government": self.parse_first_character(row['isintermunicipallocalgovernment']),
            "local_government_owned": self.parse_first_character(row['islocalgovernmentowned']),
            "municipality_local_government": self.parse_first_character(row['ismunicipalitylocalgovernment']),
            "school_district_local_government": self.parse_first_character(row['isschooldistrictlocalgovernment']),
            "township_local_government": self.parse_first_character(row['istownshiplocalgovernment']),
            "federal_agency": self.parse_first_character(row['isfederalgovernmentagency']),
            "us_federal_government": self.parse_first_character(row['federalgovernmentflag']),
            "federally_funded_research_and_development_corp": self.parse_first_character(row['isfederallyfundedresearchanddevelopmentcorp']),
            "us_tribal_government": self.parse_first_character(row['istriballyownedfirm']),
            "c8a_program_participant": self.parse_first_character(row['firm8aflag']),
            "foreign_government": self.parse_first_character(row['isforeigngovernment']),
            "community_developed_corporation_owned_firm": self.parse_first_character(row['iscommunitydevelopedcorporationownedfirm']),
            "labor_surplus_area_firm": self.parse_first_character(row['islaborsurplusareafirm']),
            "small_agricultural_cooperative": self.parse_first_character(row['issmallagriculturalcooperative']),
            "international_organization": self.parse_first_character(row['isinternationalorganization']),
            "c1862_land_grant_college": self.parse_first_character(row['is1862landgrantcollege']),
            "c1890_land_grant_college": self.parse_first_character(row['is1890landgrantcollege']),
            "c1994_land_grant_college": self.parse_first_character(row['is1994landgrantcollege']),
            "minority_institution": self.parse_first_character(row['minorityinstitutionflag']),
            "private_university_or_college": self.parse_first_character(row['isprivateuniversityorcollege']),
            "school_of_forestry": self.parse_first_character(row['isschoolofforestry']),
            "state_controlled_institution_of_higher_learning": self.parse_first_character(row['isstatecontrolledinstitutionofhigherlearning']),
            "tribal_college": self.parse_first_character(row['istribalcollege']),
            "veterinary_college": self.parse_first_character(row['isveterinarycollege']),
            "educational_institution": self.parse_first_character(row['educationalinstitutionflag']),
            "alaskan_native_servicing_institution": self.parse_first_character(row['isalaskannativeownedcorporationorfirm']),
            "community_development_corporation": self.parse_first_character(row['iscommunitydevelopmentcorporation']),
            "native_hawaiian_servicing_institution": self.parse_first_character(row['isnativehawaiianownedorganizationorfirm']),
            "domestic_shelter": self.parse_first_character(row['isdomesticshelter']),
            "manufacturer_of_goods": self.parse_first_character(row['ismanufacturerofgoods']),
            "hospital_flag": self.parse_first_character(row['hospitalflag']),
            "veterinary_hospital": self.parse_first_character(row['isveterinaryhospital']),
            "hispanic_servicing_institution": self.parse_first_character(row['ishispanicservicinginstitution']),
            "woman_owned_business": self.parse_first_character(row['womenownedflag']),
            "minority_owned_business": self.parse_first_character(row['minorityownedbusinessflag']),
            "women_owned_small_business": self.parse_first_character(row['iswomenownedsmallbusiness']),
            "economically_disadvantaged_women_owned_small_business": self.parse_first_character(row['isecondisadvwomenownedsmallbusiness']),
            "joint_venture_economic_disadvantaged_women_owned_small_bus": self.parse_first_character(row['isjointventureecondisadvwomenownedsmallbusiness']),
            "veteran_owned_business": self.parse_first_character(row['veteranownedflag']),
            "airport_authority": self.parse_first_character(row['isairportauthority']),
            "housing_authorities_public_tribal": self.parse_first_character(row['ishousingauthoritiespublicortribal']),
            "interstate_entity": self.parse_first_character(row['isinterstateentity']),
            "planning_commission": self.parse_first_character(row['isplanningcommission']),
            "port_authority": self.parse_first_character(row['isportauthority']),
            "transit_authority": self.parse_first_character(row['istransitauthority']),
            "foreign_owned_and_located": self.parse_first_character(row['isforeignownedandlocated']),
            "american_indian_owned_business": self.parse_first_character(row['aiobflag']),
            "alaskan_native_owned_corporation_or_firm": self.parse_first_character(row['isalaskannativeownedcorporationorfirm']),
            "indian_tribe_federally_recognized": self.parse_first_character(row['isindiantribe']),
            "native_hawaiian_owned_business": self.parse_first_character(row['isnativehawaiianownedorganizationorfirm']),
            "tribally_owned_business": self.parse_first_character(row['istriballyownedfirm']),
            "asian_pacific_american_owned_business": self.parse_first_character(row['apaobflag']),
            "black_american_owned_business": self.parse_first_character(row['baobflag']),
            "hispanic_american_owned_business": self.parse_first_character(row['haobflag']),
            "native_american_owned_business": self.parse_first_character(row['naobflag']),
            "historically_black_college": self.parse_first_character(row['hbcuflag']),
            "subcontinent_asian_asian_indian_american_owned_business": self.parse_first_character(row['saaobflag']),
            "us_local_government": self.parse_first_character(row['islocalgovernmentowned']),
            "division_name": self.parse_first_character(row['divisionname']),
            "division_number": self.parse_first_character(row['divisionnumberorofficecode']),
            "historically_underutilized_business_zone": self.parse_first_character(row['hubzoneflag']),
            "corporate_entity_tax_exempt": self.parse_first_character(row['iscorporateentitytaxexempt']),
            "corporate_entity_not_tax_exempt": self.parse_first_character(row['iscorporateentitynottaxexempt']),
            "council_of_governments": self.parse_first_character(row['iscouncilofgovernments']),
            "dot_certified_disadvantage": self.parse_first_character(row['isdotcertifieddisadvantagedbusinessenterprise']),
            "for_profit_organization": self.parse_first_character(row['isforprofitorganization']),
            "foundation": self.parse_first_character(row['isfoundation']),
            "joint_venture_women_owned_small_business": self.parse_first_character(row['isjointventurewomenownedsmallbusiness']),
            "limited_liability_corporation": self.parse_first_character(row['islimitedliabilitycorporation']),
            "other_not_for_profit_organization": self.parse_first_character(row['isothernotforprofitorganization']),
            "other_minority_owned_business": self.parse_first_character(row['isotherminorityowned']),
            "partnership_or_limited_liability_partnership": self.parse_first_character(row['ispartnershiporlimitedliabilitypartnership']),
            "sole_proprietorship": self.parse_first_character(row['issoleproprietorship']),
            "subchapter_scorporation": self.parse_first_character(row['issubchapterscorporation']),
            "nonprofit_organization": self.parse_first_character(row['nonprofitorganizationflag']),
        }

        le = LegalEntity.objects.filter(recipient_unique_id=row['dunsnumber']).first()
        if not le:
            le = LegalEntity.objects.create(**recipient_dict)
        else:
            LegalEntity.objects.filter(legal_entity_id=le.legal_entity_id).update(**recipient_dict)

        return le