Ejemplos de load_data_into_model en Python, ejemplos de usaspending_api.etl.management.load_base.load_data_into_model en Python

Ejemplo n.º 1

0

Mostrar archivo

def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor):
    """
    Process and load file B broker data (aka TAS balances by program
    activity and object class).
    """
    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')
    test_counter = 0
    for row in prg_act_obj_cls_data:
        test_counter += 1
        account_balances = None
        try:
            # Check and see if there is an entry for this TAS
            treasury_account = get_treasury_appropriation_account_tas_lookup(
                row.get('tas_id'), db_cursor)
            if treasury_account is None:
                raise Exception(
                    'Could not find appropriation account for TAS: ' +
                    row['tas'])
        except:
            continue

        # get the corresponding account balances row (aka "File A" record)
        account_balances = AppropriationAccountBalances.objects.get(
            treasury_account_identifier=treasury_account,
            submission_id=submission_attributes.submission_id)

        financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass(
        )

        value_map = {
            'submission':
            submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end':
            submission_attributes.reporting_period_end,
            'treasury_account':
            treasury_account,
            'appropriation_account_balances':
            account_balances,
            'object_class':
            get_or_create_object_class(row['object_class'],
                                       row['by_direct_reimbursable_fun'],
                                       logger),
            'program_activity':
            get_or_create_program_activity(row, submission_attributes)
        }

        load_data_into_model(financial_by_prg_act_obj_cls,
                             row,
                             value_map=value_map,
                             save=True,
                             reverse=reverse)

    # Insert File B quarterly numbers for this submission
    TasProgramActivityObjectClassQuarterly.insert_quarterly_numbers(
        submission_attributes.submission_id)

    FinancialAccountsByProgramActivityObjectClass.populate_final_of_fy()

Ejemplo n.º 2

0

Mostrar archivo

Archivo: load_submission.py Proyecto: Juanlara19/usaspending-api

def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor):
    """
    Process and load file B broker data (aka TAS balances by program activity and object class).
    """
    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    test_counter = 0
    for row in prg_act_obj_cls_data:
        test_counter += 1
        account_balances = None
        try:
            # Check and see if there is an entry for this TAS
            treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get('tas_id'),
                                                                                                  db_cursor)
            if treasury_account is None:
                update_skipped_tas(row, tas_rendering_label, skipped_tas)
                continue
        except Exception:    # TODO: What is this trying to catch, actually?
            continue

        # get the corresponding account balances row (aka "File A" record)
        account_balances = AppropriationAccountBalances.objects.get(
            treasury_account_identifier=treasury_account,
            submission_id=submission_attributes.submission_id
        )

        financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass()

        value_map = {
            'submission': submission_attributes,
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            'treasury_account': treasury_account,
            'appropriation_account_balances': account_balances,
            'object_class': get_or_create_object_class(row['object_class'], row['by_direct_reimbursable_fun'], logger),
            'program_activity': get_or_create_program_activity(row, submission_attributes)
        }

        load_data_into_model(financial_by_prg_act_obj_cls, row, value_map=value_map, save=True, reverse=reverse)

    # Insert File B quarterly numbers for this submission
    TasProgramActivityObjectClassQuarterly.insert_quarterly_numbers(submission_attributes.submission_id)

    FinancialAccountsByProgramActivityObjectClass.populate_final_of_fy()

    for key in skipped_tas:
        logger.info('Skipped %d rows due to missing TAS: %s', skipped_tas[key]['count'], key)

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]['count']

    logger.info('Skipped a total of {} TAS rows for File B'.format(total_tas_skipped))

Ejemplo n.º 3

0

Mostrar archivo

Archivo: load_submission.py Proyecto: mono2010/usaspending-api

def load_file_a(submission_attributes, appropriation_data, db_cursor):
    """ Process and load file A broker data (aka TAS balances, aka appropriation account balances). """
    reverse = re.compile("gross_outlay_amount_by_tas_cpe")

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    # Create account objects
    for row in appropriation_data:

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(
            row.get("tas_id"), db_cursor)
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        # Now that we have the account, we can load the appropriation balances
        # TODO: Figure out how we want to determine what row is overriden by what row
        # If we want to correlate, the following attributes are available in the data broker data that might be useful:
        # appropriation_id, row_number appropriation_balances = somethingsomething get appropriation balances...
        appropriation_balances = AppropriationAccountBalances()

        value_map = {
            "treasury_account_identifier": treasury_account,
            "submission": submission_attributes,
            "reporting_period_start":
            submission_attributes.reporting_period_start,
            "reporting_period_end": submission_attributes.reporting_period_end,
        }

        field_map = {}

        load_data_into_model(appropriation_balances,
                             row,
                             field_map=field_map,
                             value_map=value_map,
                             save=True,
                             reverse=reverse)

    AppropriationAccountBalances.populate_final_of_fy()

    for key in skipped_tas:
        logger.info(
            f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}"
        )

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]["count"]

    logger.info(
        f"Skipped a total of {total_tas_skipped:,} TAS rows for File A")

Ejemplo n.º 4

0

Mostrar archivo

Archivo: load_submission.py Proyecto: fedspendingtransparency/usaspending-api

def load_file_a(submission_attributes, appropriation_data, db_cursor):
    """
    Process and load file A broker data (aka TAS balances, aka appropriation account balances).
    """
    reverse = re.compile('gross_outlay_amount_by_tas_cpe')

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    # Create account objects
    for row in appropriation_data:

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        # Now that we have the account, we can load the appropriation balances
        # TODO: Figure out how we want to determine what row is overriden by what row
        # If we want to correlate, the following attributes are available in the data broker data that might be useful:
        # appropriation_id, row_number appropriation_balances = somethingsomething get appropriation balances...
        appropriation_balances = AppropriationAccountBalances()

        value_map = {
            'treasury_account_identifier': treasury_account,
            'submission': submission_attributes,
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end
        }

        field_map = {}

        load_data_into_model(appropriation_balances, row, field_map=field_map, value_map=value_map, save=True,
                             reverse=reverse)

    AppropriationAccountBalances.populate_final_of_fy()

    # Insert File A quarterly numbers for this submission
    AppropriationAccountBalancesQuarterly.insert_quarterly_numbers(submission_attributes.submission_id)

    for key in skipped_tas:
        logger.info('Skipped %d rows due to missing TAS: %s', skipped_tas[key]['count'], key)

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]['count']

    logger.info('Skipped a total of {} TAS rows for File A'.format(total_tas_skipped))

Ejemplo n.º 5

0

Mostrar archivo

Archivo: load_state_data.py Proyecto: workexpress/usaspending-api

    def handle(self, *args, **options):
        state_data_field_map = {
            'fips': 'FIPS',
            'code': 'Code',
            'name': 'Name',
            'type': 'Type',
            'year': 'Year',
            'population': 'Population',
            'pop_source': 'Population Source',
            'median_household_income': 'Median Household Income',
            'mhi_source': 'Median Household Income Source'
        }

        csv_file = options['file']
        remote = False

        if csv_file:
            if not os.path.exists(csv_file):
                raise FileExistsError(csv_file)
            elif os.path.splitext(csv_file)[1] != '.csv':
                raise Exception('Wrong filetype provided, expecting csv')
            file_path = csv_file
        elif not settings.IS_LOCAL and os.environ.get('USASPENDING_AWS_REGION') and os.environ.get('STATE_DATA_BUCKET'):
            s3connection = boto.s3.connect_to_region(os.environ.get('USASPENDING_AWS_REGION'))
            s3bucket = s3connection.lookup(os.environ.get('STATE_DATA_BUCKET'))
            key = s3bucket.get_key(LOCAL_STATE_DATA_FILENAME)
            file_path = os.path.join('/', 'tmp', LOCAL_STATE_DATA_FILENAME)
            key.get_contents_to_filename(file_path)
            remote = True
        else:
            file_path = LOCAL_STATE_DATA

        with open(file_path) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                # Defaulting to None's instead of ''s
                row = {key: (value or None) for key, value in row.items()}
                load_data_into_model(
                    StateData(),
                    row,
                    field_map=state_data_field_map,
                    save=True
                )

        if remote:
            os.remove(file_path)
        logger.info('Loading StateData complete')

Ejemplo n.º 6

0

Mostrar archivo

def load_file_a(submission_attributes, appropriation_data, db_cursor):
    """
    Process and load file A broker data (aka TAS balances,
    aka appropriation account balances).
    """
    reverse = re.compile('gross_outlay_amount_by_tas_cpe')
    # Create account objects
    for row in appropriation_data:

        # Check and see if there is an entry for this TAS
        treasury_account = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            raise Exception('Could not find appropriation account for TAS: ' +
                            row['tas'])

        # Now that we have the account, we can load the appropriation balances
        # TODO: Figure out how we want to determine what row is overriden by what row
        # If we want to correlate, the following attributes are available in the
        # data broker data that might be useful: appropriation_id, row_number
        # appropriation_balances = somethingsomething get appropriation balances...
        appropriation_balances = AppropriationAccountBalances()

        value_map = {
            'treasury_account_identifier': treasury_account,
            'submission': submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end
        }

        field_map = {}

        load_data_into_model(appropriation_balances,
                             row,
                             field_map=field_map,
                             value_map=value_map,
                             save=True,
                             reverse=reverse)

    AppropriationAccountBalances.populate_final_of_fy()

    # Insert File A quarterly numbers for this submission
    AppropriationAccountBalancesQuarterly.insert_quarterly_numbers(
        submission_attributes.submission_id)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: load_state_data.py Proyecto: umeshh/usaspending-api

    def handle(self, *args, **options):
        state_data_field_map = {
            "fips": "FIPS",
            "code": "Code",
            "name": "Name",
            "type": "Type",
            "year": "Year",
            "population": "Population",
            "pop_source": "Population Source",
            "median_household_income": "Median Household Income",
            "mhi_source": "Median Household Income Source",
        }

        csv_file = options["file"]
        remote = False

        if csv_file:
            if not os.path.exists(csv_file):
                raise FileExistsError(csv_file)
            elif os.path.splitext(csv_file)[1] != ".csv":
                raise Exception("Wrong filetype provided, expecting csv")
            file_path = csv_file
        elif not settings.IS_LOCAL and settings.USASPENDING_AWS_REGION and settings.STATE_DATA_BUCKET:
            s3connection = boto3.resource(
                "s3", region_name=settings.USASPENDING_AWS_REGION)
            s3bucket = s3connection.Bucket(settings.STATE_DATA_BUCKET)
            file_path = os.path.join("/", "tmp", LOCAL_STATE_DATA_FILENAME)
            s3bucket.download_file(LOCAL_STATE_DATA_FILENAME, file_path)
            remote = True
        else:
            file_path = LOCAL_STATE_DATA

        with open(file_path) as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                # Defaulting to None's instead of ''s
                row = {key: (value or None) for key, value in row.items()}
                load_data_into_model(StateData(),
                                     row,
                                     field_map=state_data_field_map,
                                     save=True)

        if remote:
            os.remove(file_path)
        logger.info("Loading StateData complete")

Ejemplo n.º 8

0

Mostrar archivo

 def update_duns(self, update_duns, update_date):
     logger.info("Updating {} duns records".format(len(update_duns)))
     for row in update_duns:
         equivalent_duns = DUNS.objects.filter(broker_duns_id=row["duns_id"])[0]
         load_data_into_model(
             equivalent_duns,
             row,
             field_map={
                 "awardee_or_recipient_uniqu": "awardee_or_recipient_uniqu",
                 "legal_business_name": "legal_business_name",
                 "ultimate_parent_unique_ide": "ultimate_parent_unique_ide",
                 "ultimate_parent_legal_enti": "ultimate_parent_legal_enti",
                 "broker_duns_id": "duns_id",
             },
             value_map={"update_date": update_date},
             as_dict=False,
             save=True,
         )

Ejemplo n.º 9

0

Mostrar archivo

Archivo: submission_attributes.py Proyecto: teamj135/usaspending-api

def get_submission_attributes(submission_id, submission_data):
    """
    For a specified broker submission, return the existing corresponding usaspending submission record or
    create and return a new one.
    """

    dabs_window = DABSSubmissionWindowSchedule.objects.filter(
        submission_fiscal_year=submission_data["reporting_fiscal_year"],
        submission_fiscal_month=submission_data["reporting_fiscal_period"],
        is_quarter=submission_data["is_quarter_format"],
    ).first()

    if not dabs_window:
        raise RuntimeError(
            f"Missing DABS Window record necessary for {submission_id}")

    # check if we already have an entry for this submission id; if not, create one
    submission_attributes, created = SubmissionAttributes.objects.get_or_create(
        submission_id=submission_id,
        defaults={"submission_window": dabs_window})

    if created:
        # this is the first time we're loading this submission
        logger.info(f"Creating submission {submission_id}")

    else:
        # we've already loaded this submission, so delete it before reloading
        logger.info(
            f"Submission {submission_id} already exists. It will be deleted.")
        call_command("rm_submission", submission_id)

    submission_data["reporting_agency_name"] = retrive_agency_name_from_code(
        submission_data["toptier_code"])

    # Update and save submission attributes
    field_map = {
        "reporting_period_start": "reporting_start_date",
        "reporting_period_end": "reporting_end_date",
        "quarter_format_flag": "is_quarter_format",
    }

    # Create our value map - specific data to load
    value_map = {
        "reporting_fiscal_quarter":
        get_fiscal_quarter(submission_data["reporting_fiscal_period"])
    }

    new_submission = load_data_into_model(submission_attributes,
                                          submission_data,
                                          field_map=field_map,
                                          value_map=value_map,
                                          save=True)

    return new_submission

Ejemplo n.º 10

0

Mostrar archivo

Archivo: load_submission.py Proyecto: Juanlara19/usaspending-api

def get_submission_attributes(broker_submission_id, submission_data):
    """
    For a specified broker submission, return the existing corresponding usaspending submission record or create and
    return a new one.
    """
    # check if we already have an entry for this broker submission id; if not, create one
    submission_attributes, created = SubmissionAttributes.\
        objects.get_or_create(broker_submission_id=broker_submission_id)

    if created:
        # this is the first time we're loading this broker submission
        logger.info('Creating broker submission id {}'.format(broker_submission_id))

    else:
        # we've already loaded this broker submission, so delete it before reloading if there's another submission that
        # references this one as a "previous submission" do not proceed.
        # TODO: now that we're chaining submisisons together, get clarification on what should happen when a submission
        # in the middle of the chain is deleted

        TasProgramActivityObjectClassQuarterly.refresh_downstream_quarterly_numbers(submission_attributes.submission_id)

        logger.info('Broker submission id {} already exists. It will be deleted.'.format(broker_submission_id))
        call_command('rm_submission', broker_submission_id)

    logger.info("Merging CGAC and FREC columns")
    submission_data["cgac_code"] = submission_data["cgac_code"]\
        if submission_data["cgac_code"] else submission_data["frec_code"]

    # Find the previous submission for this CGAC and fiscal year (if there is one)
    previous_submission = get_previous_submission(
        submission_data['cgac_code'],
        submission_data['reporting_fiscal_year'],
        submission_data['reporting_fiscal_period'])

    # Update and save submission attributes
    field_map = {
        'reporting_period_start': 'reporting_start_date',
        'reporting_period_end': 'reporting_end_date',
        'quarter_format_flag': 'is_quarter_format',
    }

    # Create our value map - specific data to load
    value_map = {
        'broker_submission_id': broker_submission_id,
        'reporting_fiscal_quarter': get_fiscal_quarter(submission_data['reporting_fiscal_period']),
        'previous_submission': None if previous_submission is None else previous_submission,
        # pull in broker's last update date to use as certified date
        'certified_date': submission_data['updated_at'].date() if type(
            submission_data['updated_at']) == datetime else None,
    }

    return load_data_into_model(
        submission_attributes, submission_data,
        field_map=field_map, value_map=value_map, save=True)

Ejemplo n.º 11

0

Mostrar archivo

 def update_duns(self, update_duns, update_date):
     logger.info('Updating {} duns records'.format(len(update_duns)))
     for row in update_duns:
         equivalent_duns = DUNS.objects.filter(
             broker_duns_id=row['duns_id'])[0]
         load_data_into_model(equivalent_duns,
                              row,
                              field_map={
                                  'awardee_or_recipient_uniqu':
                                  'awardee_or_recipient_uniqu',
                                  'legal_business_name':
                                  'legal_business_name',
                                  'ultimate_parent_unique_ide':
                                  'ultimate_parent_unique_ide',
                                  'ultimate_parent_legal_enti':
                                  'ultimate_parent_legal_enti',
                                  'broker_duns_id': 'duns_id'
                              },
                              value_map={'update_date': update_date},
                              as_dict=False,
                              save=True)

Ejemplo n.º 12

0

Mostrar archivo

    def load_transaction_normalized(self, fabs_broker_data, total_rows):
        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info(
                    'Transaction Normalized: Loading row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))
            parent_txn_value_map = {
                "award":
                award_lookup[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "recipient":
                legal_entity_lookup[index - 1],
                "place_of_performance":
                pop_bulk[index - 1],
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                row['modified_at']
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
            }

            transaction_normalized = load_data_into_model(
                TransactionNormalized(),
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=False,
                save=False)

            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info(
            'Bulk creating Transaction Normalized (batch_size: {})...'.format(
                BATCH_SIZE))
        TransactionNormalized.objects.bulk_create(transaction_normalized_bulk,
                                                  batch_size=BATCH_SIZE)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: file_c.py Proyecto: jbuendiallc/usaspending-api

def _save_file_c_rows(certified_award_financial, total_rows, start_time, skipped_tas, submission_attributes, reverse):
    save_manager = BulkCreateManager(FinancialAccountsByAwards)
    for index, row in enumerate(certified_award_financial, 1):
        if not (index % 1000):
            logger.info(f"C File Load: Loading row {index:,} of {total_rows:,} ({datetime.now() - start_time})")

        upper_case_dict_values(row)

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id"))
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        # Find a matching transaction record, so we can use its subtier agency information to match to (or create) an
        # Award record.

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        filters = {}
        if row.get("piid"):
            filters["piid"] = row.get("piid")
            filters["parent_piid"] = row.get("parent_award_id")
        else:
            if row.get("fain") and not row.get("uri"):
                filters["fain"] = row.get("fain")
            elif row.get("uri") and not row.get("fain"):
                filters["uri"] = row.get("uri")
            else:
                filters["fain"] = row.get("fain")
                filters["uri"] = row.get("uri")

        award_financial_data = FinancialAccountsByAwards()

        value_map_faba = {
            "submission": submission_attributes,
            "reporting_period_start": submission_attributes.reporting_period_start,
            "reporting_period_end": submission_attributes.reporting_period_end,
            "treasury_account": treasury_account,
            "object_class": row.get("object_class"),
            "program_activity": row.get("program_activity"),
            "disaster_emergency_fund": get_disaster_emergency_fund(row),
        }

        save_manager.append(
            load_data_into_model(award_financial_data, row, value_map=value_map_faba, save=False, reverse=reverse)
        )

    save_manager.save_stragglers()

Ejemplo n.º 14

0

Mostrar archivo

Archivo: file_a.py Proyecto: lenjonemcse/usaspending-api

def load_file_a(submission_attributes, appropriation_data, db_cursor):
    """ Process and load file A broker data (aka TAS balances, aka appropriation account balances). """
    reverse = re.compile("gross_outlay_amount_by_tas_cpe")
    skipped_tas = defaultdict(int)  # tracks count of rows skipped due to "missing" TAS
    bulk_treasury_appropriation_account_tas_lookup(appropriation_data, db_cursor)

    # Create account objects
    save_manager = BulkCreateManager(AppropriationAccountBalances)
    for row in appropriation_data:

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(row.get("tas_id"))
        if treasury_account is None:
            skipped_tas[tas_rendering_label] += 1
            continue

        # Now that we have the account, we can load the appropriation balances
        # TODO: Figure out how we want to determine what row is overridden by what row
        # If we want to correlate, the following attributes are available in the data broker data that might be useful:
        # appropriation_id, row_number appropriation_balances = something something get appropriation balances...
        appropriation_balances = AppropriationAccountBalances()

        value_map = {
            "treasury_account_identifier": treasury_account,
            "submission": submission_attributes,
            "reporting_period_start": submission_attributes.reporting_period_start,
            "reporting_period_end": submission_attributes.reporting_period_end,
        }

        field_map = {}

        save_manager.append(
            load_data_into_model(
                appropriation_balances, row, field_map=field_map, value_map=value_map, save=False, reverse=reverse
            )
        )

    save_manager.save_stragglers()

    for tas, count in skipped_tas.items():
        logger.info(f"Skipped {count:,} rows due to {tas}")

    total_tas_skipped = sum([count for count in skipped_tas.values()])

    if total_tas_skipped > 0:
        logger.info(f"SKIPPED {total_tas_skipped:,} ROWS of File A (missing TAS)")
    else:
        logger.info("All File A records in Broker loaded into USAspending")

Ejemplo n.º 15

0

Mostrar archivo

 def add_duns(self, new_duns, update_date):
     logger.info('Adding {} duns records'.format(len(new_duns)))
     new_records = []
     for row in new_duns:
         new_record = load_data_into_model(
             DUNS(),
             row,
             field_map={
                 'awardee_or_recipient_uniqu': 'awardee_or_recipient_uniqu',
                 'legal_business_name': 'legal_business_name',
                 'ultimate_parent_unique_ide': 'ultimate_parent_unique_ide',
                 'ultimate_parent_legal_enti': 'ultimate_parent_legal_enti',
                 'broker_duns_id': 'duns_id'
             },
             value_map={'update_date': update_date},
             as_dict=False,
             save=False)
         new_records.append(new_record)
     DUNS.objects.bulk_create(new_records)

Ejemplo n.º 16

0

Mostrar archivo

 def add_duns(self, new_duns, update_date):
     logger.info("Adding {} duns records".format(len(new_duns)))
     new_records = []
     for row in new_duns:
         new_record = load_data_into_model(
             DUNS(),
             row,
             field_map={
                 "awardee_or_recipient_uniqu": "awardee_or_recipient_uniqu",
                 "legal_business_name": "legal_business_name",
                 "ultimate_parent_unique_ide": "ultimate_parent_unique_ide",
                 "ultimate_parent_legal_enti": "ultimate_parent_legal_enti",
                 "broker_duns_id": "duns_id",
             },
             value_map={"update_date": update_date},
             as_dict=False,
             save=False,
         )
         new_records.append(new_record)
     DUNS.objects.bulk_create(new_records)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: bulk_load_fpds.py Proyecto: fedspendingtransparency/usaspending-api

    def load_transaction_fpds(self, fpds_broker_data, total_rows):
        logger.info('Starting bulk loading for FPDS data')

        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Transaction FPDS: Loading row {} of {} ({})'.format(str(index),
                                                                                 str(total_rows),
                                                                                 datetime.now() - start_time))

            fpds_instance_data = load_data_into_model(
                TransactionFPDS(),  # thrown away
                row,
                as_dict=True)

            fpds_instance = TransactionFPDS(**fpds_instance_data)
            fpds_instance.transaction = transaction_normalized_bulk[index - 1]
            fpds_bulk.append(fpds_instance)

        logger.info('Bulk creating Transaction FPDS (batch_size: {})...'.format(BATCH_SIZE))
        TransactionFPDS.objects.bulk_create(fpds_bulk, batch_size=BATCH_SIZE)

Ejemplo n.º 18

0

Mostrar archivo

    def load_legal_entity(self, fabs_broker_data, total_rows):

        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info('Legal Entity: Loading row {} of {} ({})'.format(
                    str(index), str(total_rows),
                    datetime.now() - start_time))

            recipient_name = row['awardee_or_recipient_legal']
            if recipient_name is None:
                recipient_name = ''
            recipient_unique_id = row['awardee_or_recipient_uniqu']
            if recipient_unique_id is None:
                recipient_unique_id = ''

            lookup_key = (recipient_unique_id, recipient_name)
            legal_entity = self.le_map.get(lookup_key)

            if not legal_entity:
                legal_entity = LegalEntity(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)

                legal_entity = load_data_into_model(
                    legal_entity,
                    row,
                    value_map={"location": lel_bulk[index - 1]},
                    save=False)

                LegalEntity.update_business_type_categories(legal_entity)

                self.le_map[lookup_key] = legal_entity
                legal_entity_bulk.append(legal_entity)
            legal_entity_lookup.append(legal_entity)

        logger.info('Bulk creating Legal Entities (batch_size: {})...'.format(
            BATCH_SIZE))
        LegalEntity.objects.bulk_create(legal_entity_bulk,
                                        batch_size=BATCH_SIZE)

Ejemplo n.º 19

0

Mostrar archivo

def _save_file_c_rows(certified_award_financial, total_rows, start_time,
                      skipped_tas, submission_attributes, reverse):
    save_manager = BulkCreateManager(FinancialAccountsByAwards)
    for index, row in enumerate(certified_award_financial, 1):
        if not (index % 1000):
            logger.info(
                f"C File Load: Loading row {index:,} of {total_rows:,} ({datetime.now() - start_time})"
            )

        upper_case_dict_values(row)

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(
            row.get("tas_id"))
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        award_financial_data = FinancialAccountsByAwards()

        value_map_faba = {
            "submission": submission_attributes,
            "reporting_period_start":
            submission_attributes.reporting_period_start,
            "reporting_period_end": submission_attributes.reporting_period_end,
            "treasury_account": treasury_account,
            "object_class": row.get("object_class"),
            "program_activity": row.get("program_activity"),
            "disaster_emergency_fund": get_disaster_emergency_fund(row),
            "distinct_award_key": create_distinct_award_key(row),
        }

        save_manager.append(
            load_data_into_model(award_financial_data,
                                 row,
                                 value_map=value_map_faba,
                                 save=False,
                                 reverse=reverse))

    save_manager.save_stragglers()

Ejemplo n.º 20

0

Mostrar archivo

Archivo: bulk_load_fpds.py Proyecto: fedspendingtransparency/usaspending-api

    def load_legal_entity(self, fpds_broker_data, total_rows):

        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Legal Entity: Loading row {} of {} ({})'.format(str(index),
                                                                             str(total_rows),
                                                                             datetime.now() - start_time))

            recipient_name = row['awardee_or_recipient_legal']
            if recipient_name is None:
                recipient_name = ''
            recipient_unique_id = row['awardee_or_recipient_uniqu']
            if recipient_unique_id is None:
                recipient_unique_id = ''

            lookup_key = (recipient_unique_id, recipient_name)
            legal_entity = self.le_map.get(lookup_key)

            if not legal_entity:
                legal_entity = LegalEntity(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name
                )

                legal_entity = load_data_into_model(
                    legal_entity,
                    row,
                    value_map={"location": lel_bulk[index - 1]},
                    save=False)

                LegalEntity.update_business_type_categories(legal_entity)

                self.le_map[lookup_key] = legal_entity
                legal_entity_bulk.append(legal_entity)
            legal_entity_lookup.append(legal_entity)

        logger.info('Bulk creating Legal Entities (batch_size: {})...'.format(BATCH_SIZE))
        LegalEntity.objects.bulk_create(legal_entity_bulk, batch_size=BATCH_SIZE)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: bulk_load_fpds.py Proyecto: fedspendingtransparency/usaspending-api

    def load_transaction_normalized(self, fpds_broker_data, total_rows):
        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Transaction Normalized: Loading row {} of {} ({})'.format(str(index),
                                                                                       str(total_rows),
                                                                                       datetime.now() - start_time))
            parent_txn_value_map = {
                "award": award_lookup[index - 1],
                "awarding_agency": awarding_agency_list[index - 1],
                "funding_agency": funding_agency_list[index - 1],
                "recipient": legal_entity_lookup[index - 1],
                "place_of_performance": pop_bulk[index - 1],
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
                "last_modified_date": row['last_modified']
            }

            contract_field_map = {
                "type": "contract_award_type",
                "description": "award_description"
            }

            transaction_normalized = load_data_into_model(
                TransactionNormalized(),
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=False,
                save=False)

            transaction_normalized.fiscal_year = fy(transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating Transaction Normalized (batch_size: {})...'.format(BATCH_SIZE))
        TransactionNormalized.objects.bulk_create(transaction_normalized_bulk, batch_size=BATCH_SIZE)

Ejemplo n.º 22

0

Mostrar archivo

    def load_transaction_fabs(self, fabs_broker_data, total_rows):
        logger.info('Starting bulk loading for FABS data')

        start_time = datetime.now()
        for index, row in enumerate(fabs_broker_data, 1):
            if not (index % 10000):
                logger.info(
                    'Transaction FABS: Loading row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            fabs_instance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            fabs_instance = TransactionFABS(**fabs_instance_data)
            fabs_instance.transaction = transaction_normalized_bulk[index - 1]
            fabs_bulk.append(fabs_instance)

        logger.info(
            'Bulk creating Transaction FABS (batch_size: {})...'.format(
                BATCH_SIZE))
        TransactionFABS.objects.bulk_create(fabs_bulk, batch_size=BATCH_SIZE)

Ejemplo n.º 23

0

Mostrar archivo

def get_submission_attributes(broker_submission_id, submission_data):
    """
    For a specified broker submission, return the existing corresponding usaspending submission record or create and
    return a new one.
    """
    # check if we already have an entry for this broker submission id; if not, create one
    submission_attributes, created = SubmissionAttributes.objects.get_or_create(
        broker_submission_id=broker_submission_id)

    if created:
        # this is the first time we're loading this broker submission
        logger.info(
            "Creating broker submission id {}".format(broker_submission_id))

    else:
        # we've already loaded this broker submission, so delete it before reloading if there's another submission that
        # references this one as a "previous submission" do not proceed.
        # TODO: now that we're chaining submissions together, get clarification on what should happen when a submission
        # in the middle of the chain is deleted

        TasProgramActivityObjectClassQuarterly.refresh_downstream_quarterly_numbers(
            submission_attributes.submission_id)

        logger.info(
            "Broker submission id {} already exists. It will be deleted.".
            format(broker_submission_id))
        call_command("rm_submission", broker_submission_id)

    logger.info("Merging CGAC and FREC columns")
    submission_data["toptier_code"] = (submission_data["cgac_code"]
                                       if submission_data["cgac_code"] else
                                       submission_data["frec_code"])

    # Find the previous submission for this CGAC and fiscal year (if there is one)
    previous_submission = get_previous_submission(
        submission_data["toptier_code"],
        submission_data["reporting_fiscal_year"],
        submission_data["reporting_fiscal_period"],
    )

    # if another submission lists the previous submission as its previous submission, set to null and update later
    potential_conflicts = []
    if previous_submission:
        potential_conflicts = SubmissionAttributes.objects.filter(
            previous_submission=previous_submission)
        if potential_conflicts:
            logger.info(
                "==== ATTENTION! Previous Submission ID Conflict Detected ===="
            )
            for conflict in potential_conflicts:
                logger.info(
                    "Temporarily setting {}'s Previous Submission ID from {} to null"
                    .format(conflict, previous_submission.submission_id))
                conflict.previous_submission = None
                conflict.save()

    # Update and save submission attributes
    field_map = {
        "reporting_period_start": "reporting_start_date",
        "reporting_period_end": "reporting_end_date",
        "quarter_format_flag": "is_quarter_format",
    }

    # Create our value map - specific data to load
    value_map = {
        "broker_submission_id":
        broker_submission_id,
        "reporting_fiscal_quarter":
        get_fiscal_quarter(submission_data["reporting_fiscal_period"]),
        "previous_submission":
        previous_submission,
        # pull in broker's last update date to use as certified date
        "certified_date":
        submission_data["updated_at"].date()
        if type(submission_data["updated_at"]) == datetime else None,
    }

    new_submission = load_data_into_model(submission_attributes,
                                          submission_data,
                                          field_map=field_map,
                                          value_map=value_map,
                                          save=True)

    # If there were any submissions which were temporarily modified, reassign the submission
    for conflict in potential_conflicts:
        remapped_previous = get_previous_submission(
            conflict.toptier_code, conflict.reporting_fiscal_year,
            conflict.reporting_fiscal_period)
        logger.info(
            "New Previous Submission ID for Submission ID {} permanently mapped to {} "
            .format(conflict.submission_id, remapped_previous))
        conflict.previous_submission = remapped_previous
        conflict.save()

    return new_submission

Ejemplo n.º 24

0

Mostrar archivo

Archivo: fpds_nightly_loader.py Proyecto: fedspendingtransparency/usaspending-api

    def insert_new_fpds(self, to_insert, total_rows):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
        }

        for index, row in enumerate(to_insert, 1):
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True, "is_fpds": True}
            )
            recipient_name = row["awardee_or_recipient_legal"]
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row["awardee_or_recipient_uniqu"],
                recipient_name=recipient_name if recipient_name is not None else "",
            )
            legal_entity_value_map = {
                "location": legal_entity_location,
                "business_categories": get_business_categories(row=row, data_type="fpds"),
                "is_fpds": True,
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = (
                "CONT_AW_"
                + (row["agency_id"] if row["agency_id"] else "-NONE-")
                + "_"
                + (row["referenced_idv_agency_iden"] if row["referenced_idv_agency_iden"] else "-NONE-")
                + "_"
                + (row["piid"] if row["piid"] else "-NONE-")
                + "_"
                + (row["parent_award_id"] if row["parent_award_id"] else "-NONE-")
            )

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id, piid=row["piid"]
            )
            award.parent_award_piid = row.get("parent_award_id")
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            if row["last_modified"] and len(str(row["last_modified"])) == len("YYYY-MM-DD HH:MM:SS"):  # 19 characters
                dt_fmt = "%Y-%m-%d %H:%M:%S"
            else:
                dt_fmt = "%Y-%m-%d %H:%M:%S.%f"  # try using this even if last_modified isn't a valid string

            try:
                last_mod_date = datetime.strptime(str(row["last_modified"]), dt_fmt).date()
            except ValueError:  # handle odd-string formats and NULLs from the upstream FPDS-NG system
                info_message = "Invalid value '{}' does not match: '{}'".format(row["last_modified"], dt_fmt)
                logger.info(info_message)
                last_mod_date = None

            award_type, award_type_desc = award_types(row)

            parent_txn_value_map = {
                "award": award,
                "awarding_agency": awarding_agency,
                "funding_agency": funding_agency,
                "recipient": legal_entity,
                "place_of_performance": pop_location,
                "period_of_performance_start_date": format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date": format_date(row["period_of_performance_curr"]),
                "action_date": format_date(row["action_date"]),
                "last_modified_date": last_mod_date,
                "transaction_unique_id": row["detached_award_proc_unique"],
                "generated_unique_award_id": generated_unique_id,
                "is_fpds": True,
                "type": award_type,
                "type_description": award_type_desc,
            }

            contract_field_map = {"description": "award_description"}

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            contract_instance = load_data_into_model(TransactionFPDS(), row, as_dict=True)  # thrown away

            detached_award_proc_unique = contract_instance["detached_award_proc_unique"]
            unique_fpds = TransactionFPDS.objects.filter(detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.now(timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).update(
                    **transaction_normalized_dict
                )

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(**transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction, **contract_instance)
                transaction_fpds.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = detached_award_proc_unique
            legal_entity.save()

Ejemplo n.º 25

0

Mostrar archivo

    def insert_new_fabs(self, to_insert):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perform_country_n",
            "state_code": "place_of_perfor_state_code",
            "state_name": "place_of_perform_state_nam",
            "city_name": "place_of_performance_city",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "foreign_location_description": "place_of_performance_forei",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "performance_code": "place_of_performance_code",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "city_name": "legal_entity_city_name",
            "city_code": "legal_entity_city_code",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "foreign_location_description": "legal_entity_foreign_descr",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "foreign_city_name": "legal_entity_foreign_city",
        }

        for row in to_insert:
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True})
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "",
                parent_recipient_unique_id=row['ultimate_parent_unique_ide'],
            )
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fabs'),
                "business_types_description":
                row['business_types_desc'],
            }
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

            # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
            # non-numeric record type values
            record_type_int = int(row['record_type'])
            if record_type_int == 1:
                uri = row['uri'] if row['uri'] else '-NONE-'
                fain = '-NONE-'
            elif record_type_int in (2, 3):
                uri = '-NONE-'
                fain = row['fain'] if row['fain'] else '-NONE-'
            else:
                msg = "Invalid record type encountered for the following afa_generated_unique record: {}"
                raise Exception(msg.format(row['afa_generated_unique']))

            astac = row["awarding_sub_tier_agency_c"] if row[
                "awarding_sub_tier_agency_c"] else "-NONE-"
            generated_unique_id = "ASST_AW_{}_{}_{}".format(astac, fain, uri)

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                fain=row['fain'],
                uri=row['uri'],
                record_type=row['record_type'],
            )
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['modified_at']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "type_description":
                row['assistance_type_desc'],
                "transaction_unique_id":
                row['afa_generated_unique'],
                "generated_unique_award_id":
                generated_unique_id,
            }

            fad_field_map = {
                "type": "assistance_type",
                "description": "award_description",
                "funding_amount": "total_funding_amount",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            financial_assistance_data = load_data_into_model(
                TransactionFABS(), row, as_dict=True)  # thrown away

            afa_generated_unique = financial_assistance_data[
                'afa_generated_unique']
            unique_fabs = TransactionFABS.objects.filter(
                afa_generated_unique=afa_generated_unique)

            if unique_fabs.first():
                transaction_normalized_dict["update_date"] = datetime.now(
                    timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # Update TransactionNormalized
                TransactionNormalized.objects.filter(
                    id=unique_fabs.first().transaction.id).update(
                        **transaction_normalized_dict)

                # Update TransactionFABS
                unique_fabs.update(**financial_assistance_data)
            else:
                # Create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # Create TransactionFABS
                transaction_fabs = TransactionFABS(transaction=transaction,
                                                   **financial_assistance_data)
                transaction_fabs.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = afa_generated_unique
            legal_entity.save()

Ejemplo n.º 26

0

Mostrar archivo

Archivo: update_transactions.py Proyecto: fedspendingtransparency/usaspending-api

    def update_transaction_assistance(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"

        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of assistance data")

        # skip_count = 0


# ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info('Getting legal entity location objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map, row, {"recipient_flag": True}, save=False
            )

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting place of performance objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map, row, {"place_of_performance_flag": True}, save=False
            )

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                                      recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(recipient_unique_id=row['awardee_or_recipient_uniqu'],
                                           recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'],
                row["awarding_sub_tier_agency_c"]
            )
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'],
                row["funding_sub_tier_agency_co"]
            )

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False
            )

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award": award_bulk[index - 1],
                "awarding_agency": awarding_agency_list[index - 1],
                "funding_agency": funding_agency_list[index - 1],
                "recipient": legal_entity_bulk[index - 1],
                "place_of_performance": pop_bulk[index - 1],
                "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                "action_date": format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(**transaction_dict)
            transaction_normalized.fiscal_year = fy(transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(transaction_normalized_bulk)
        except IntegrityError:
            logger.info('Tried and failed to insert duplicate transaction_normalized row. Continuing... ')

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(transaction=transaction_normalized_bulk[index - 1],
                                                     **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

Ejemplo n.º 27

0

Mostrar archivo

Archivo: update_transactions.py Proyecto: fedspendingtransparency/usaspending-api

    def update_transaction_contract(db_cursor, fiscal_year=None, page=1, limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page-1)*limit]

        logger.info("Executing query on Broker DB => " + query % (arguments[0], arguments[1],
                                                                  arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {
            "recipient_flag": True
        }

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {
            "place_of_performance_flag": True
        }

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) + " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info('D1 File Load: Loading row {} of {} ({})'.format(str(index),
                                                                                 str(total_rows),
                                                                                 datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row, copy(legal_entity_location_value_map)
                )

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name
                )

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row, copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"]
                )
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award": award,
                    "awarding_agency": awarding_agency,
                    "funding_agency": Agency.get_by_toptier_subtier(row['funding_agency_code'],
                                                                    row["funding_sub_tier_agency_co"]),
                    "recipient": legal_entity,
                    "place_of_performance": pop_location,
                    "period_of_performance_start_date": format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
                    "action_date": format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(**transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction, **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass

Ejemplo n.º 28

0

Mostrar archivo

def insert_new_fabs(to_insert):
    fabs_normalized_field_map = {
        "type": "assistance_type",
        "description": "award_description",
        "funding_amount": "total_funding_amount",
    }

    fabs_field_map = {
        "officer_1_name": "high_comp_officer1_full_na",
        "officer_1_amount": "high_comp_officer1_amount",
        "officer_2_name": "high_comp_officer2_full_na",
        "officer_2_amount": "high_comp_officer2_amount",
        "officer_3_name": "high_comp_officer3_full_na",
        "officer_3_amount": "high_comp_officer3_amount",
        "officer_4_name": "high_comp_officer4_full_na",
        "officer_4_amount": "high_comp_officer4_amount",
        "officer_5_name": "high_comp_officer5_full_na",
        "officer_5_amount": "high_comp_officer5_amount",
    }

    update_award_ids = []
    for row in to_insert:
        upper_case_dict_values(row)

        # Find the toptier awards from the subtier awards
        awarding_agency = Agency.get_by_subtier_only(
            row["awarding_sub_tier_agency_c"])
        funding_agency = Agency.get_by_subtier_only(
            row["funding_sub_tier_agency_co"])

        # Create the summary Award
        (created, award) = Award.get_or_create_summary_award(
            generated_unique_award_id=row["unique_award_key"],
            fain=row["fain"],
            uri=row["uri"],
            record_type=row["record_type"],
        )
        award.save()

        # Append row to list of Awards updated
        update_award_ids.append(award.id)

        try:
            last_mod_date = datetime.strptime(str(row["modified_at"]),
                                              "%Y-%m-%d %H:%M:%S.%f").date()
        except ValueError:
            last_mod_date = datetime.strptime(str(row["modified_at"]),
                                              "%Y-%m-%d %H:%M:%S").date()

        parent_txn_value_map = {
            "award":
            award,
            "awarding_agency":
            awarding_agency,
            "funding_agency":
            funding_agency,
            "period_of_performance_start_date":
            format_date(row["period_of_performance_star"]),
            "period_of_performance_current_end_date":
            format_date(row["period_of_performance_curr"]),
            "action_date":
            format_date(row["action_date"]),
            "last_modified_date":
            last_mod_date,
            "type_description":
            row["assistance_type_desc"],
            "transaction_unique_id":
            row["afa_generated_unique"],
            "business_categories":
            get_business_categories(row=row, data_type="fabs"),
        }

        transaction_normalized_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fabs_normalized_field_map,
            value_map=parent_txn_value_map,
            as_dict=True,
        )

        financial_assistance_data = load_data_into_model(
            TransactionFABS(),
            row,
            field_map=fabs_field_map,
            as_dict=True  # thrown away
        )

        # Hack to cut back on the number of warnings dumped to the log.
        financial_assistance_data["updated_at"] = cast_datetime_to_utc(
            financial_assistance_data["updated_at"])
        financial_assistance_data["created_at"] = cast_datetime_to_utc(
            financial_assistance_data["created_at"])
        financial_assistance_data["modified_at"] = cast_datetime_to_utc(
            financial_assistance_data["modified_at"])

        afa_generated_unique = financial_assistance_data[
            "afa_generated_unique"]
        unique_fabs = TransactionFABS.objects.filter(
            afa_generated_unique=afa_generated_unique)

        if unique_fabs.first():
            transaction_normalized_dict["update_date"] = datetime.now(
                timezone.utc)
            transaction_normalized_dict["fiscal_year"] = fy(
                transaction_normalized_dict["action_date"])

            # Update TransactionNormalized
            TransactionNormalized.objects.filter(
                id=unique_fabs.first().transaction.id).update(
                    **transaction_normalized_dict)

            # Update TransactionFABS
            unique_fabs.update(**financial_assistance_data)
        else:
            # Create TransactionNormalized
            transaction_normalized = TransactionNormalized(
                **transaction_normalized_dict)
            transaction_normalized.save()

            # Create TransactionFABS
            transaction_fabs = TransactionFABS(
                transaction=transaction_normalized,
                **financial_assistance_data)
            transaction_fabs.save()

    return update_award_ids

Ejemplo n.º 29

0

Mostrar archivo

    def insert_new_fpds(self, to_insert, total_rows):
        logger.info('Starting insertion of new FPDS data')

        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5"
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5"
        }

        start_time = datetime.now()

        for index, row in enumerate(to_insert, 1):
            if not (index % 1000):
                logger.info(
                    'Inserting Stale FPDS: Inserting row {} of {} ({})'.format(
                        str(index), str(total_rows),
                        datetime.now() - start_time))

            for key in row:
                if isinstance(row[key], str):
                    row[key] = row[key].upper()

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row['awardee_or_recipient_legal']
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name
                if recipient_name is not None else "")
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type='fpds'),
                "is_fpds":
                True
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = 'CONT_AW_' + (row['agency_id'] if row['agency_id'] else '-NONE-') + '_' + \
                (row['referenced_idv_agency_iden'] if row['referenced_idv_agency_iden'] else '-NONE-') + '_' + \
                (row['piid'] if row['piid'] else '-NONE-') + '_' + \
                (row['parent_award_id'] if row['parent_award_id'] else '-NONE-')

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row['piid'])
            award.parent_award_piid = row.get('parent_award_id')
            award.save()

            # Append row to list of Awards updated
            award_update_id_list.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row['last_modified']), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row['last_modified']),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row['detached_award_proc_unique'],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description"
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            contract_instance = load_data_into_model(
                TransactionFPDS(),  # thrown away
                row,
                as_dict=True)

            detached_award_proc_unique = contract_instance[
                'detached_award_proc_unique']
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.utcnow()
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(id=unique_fpds.first().transaction.id).\
                    update(**transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()

Ejemplo n.º 30

0

Mostrar archivo

Archivo: load_submission.py Proyecto: fedspendingtransparency/usaspending-api

def load_file_c(submission_attributes, db_cursor, award_financial_frame):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some
    additional information about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we
    # change up the order of file loading

    if not award_financial_frame.size:
        logger.warning('No File C (award financial) data found, skipping...')
        return

    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    award_financial_frame['txn'] = award_financial_frame.apply(get_award_financial_transaction, axis=1)
    award_financial_frame['awarding_agency'] = award_financial_frame.apply(get_awarding_agency, axis=1)
    award_financial_frame['object_class'] = award_financial_frame.apply(get_or_create_object_class_rw, axis=1,
                                                                        logger=logger)
    award_financial_frame['program_activity'] = award_financial_frame.apply(get_or_create_program_activity, axis=1,
                                                                            submission_attributes=submission_attributes)

    total_rows = award_financial_frame.shape[0]
    start_time = datetime.now()
    awards_touched = []

    # format award_financial_frame
    float_cols = ['transaction_obligated_amou']
    award_financial_frame[float_cols] = award_financial_frame[float_cols].fillna(0)
    award_financial_frame = award_financial_frame.replace({np.nan: None})

    for index, row in enumerate(award_financial_frame.to_dict(orient='records'), 1):
        if not (index % 100):
            logger.info('C File Load: Loading row {} of {} ({})'.format(str(index),
                                                                        str(total_rows),
                                                                        datetime.now() - start_time))

        upper_case_dict_values(row)

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        # Find a matching transaction record, so we can use its subtier agency information to match to (or create) an
        # Award record.

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        filters = {}
        if row.get('piid'):
            filters['piid'] = row.get('piid')
            filters['parent_piid'] = row.get('parent_award_id')
        else:
            if row.get('fain') and not row.get('uri'):
                filters['fain'] = row.get('fain')
            elif row.get('uri') and not row.get('fain'):
                filters['uri'] = row.get('uri')
            else:
                filters['fain'] = row.get('fain')
                filters['uri'] = row.get('uri')

        award = find_matching_award(**filters)

        if award:
            awards_touched += [award]

        award_financial_data = FinancialAccountsByAwards()

        value_map_faba = {
            'award': award,
            'submission': submission_attributes,
            'reporting_period_start': submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            'treasury_account': treasury_account,
            'object_class': row.get('object_class'),
            'program_activity': row.get('program_activity'),
        }

        # Still using the cpe|fyb regex compiled above for reverse
        load_data_into_model(award_financial_data, row, value_map=value_map_faba, save=True, reverse=reverse)

    awards_cache.clear()

    for key in skipped_tas:
        logger.info('Skipped %d rows due to missing TAS: %s', skipped_tas[key]['count'], key)

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]['count']

    logger.info('Skipped a total of {} TAS rows for File C'.format(total_tas_skipped))

    return [award.id for award in awards_touched if award]

Ejemplo n.º 31

0

Mostrar archivo

def load_file_c(submission_attributes, db_cursor, award_financial_frame):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try
    to join to those records to retrieve some additional information
    about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here
    # to ensure that we don't overwrite it as we change up the order of
    # file loading
    reverse = re.compile(r'(_(cpe|fyb)$)|^transaction_obligated_amount$')

    award_financial_frame['txn'] = award_financial_frame.apply(
        get_award_financial_transaction, axis=1)
    award_financial_frame['awarding_agency'] = award_financial_frame.apply(
        get_awarding_agency, axis=1)
    award_financial_frame['object_class'] = award_financial_frame.apply(
        get_or_create_object_class_rw, axis=1, logger=logger)
    award_financial_frame['program_activity'] = award_financial_frame.apply(
        get_or_create_program_activity,
        axis=1,
        submission_attributes=submission_attributes)

    # for row in award_financial_data:
    for row in award_financial_frame.replace({
            np.nan: None
    }).to_dict(orient='records'):
        # Check and see if there is an entry for this TAS
        treasury_account = get_treasury_appropriation_account_tas_lookup(
            row.get('tas_id'), db_cursor)
        if treasury_account is None:
            raise Exception('Could not find appropriation account for TAS: ' +
                            row['tas'])

        # Find a matching transaction record, so we can use its
        # subtier agency information to match to (or create) an Award record

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        created, award = Award.get_or_create_summary_award(
            awarding_agency=row['awarding_agency'],
            piid=row.get('piid'),
            fain=row.get('fain'),
            uri=row.get('uri'),
            parent_award_id=row.get('parent_award_id'),
            use_cache=False)

        award_financial_data = FinancialAccountsByAwards()

        value_map = {
            'award': award,
            'submission': submission_attributes,
            'reporting_period_start':
            submission_attributes.reporting_period_start,
            'reporting_period_end': submission_attributes.reporting_period_end,
            'treasury_account': treasury_account,
            'object_class': row.get('object_class'),
            'program_activity': row.get('program_activity'),
        }

        # Still using the cpe|fyb regex compiled above for reverse
        afd = load_data_into_model(award_financial_data,
                                   row,
                                   value_map=value_map,
                                   save=True,
                                   reverse=reverse)

    awards_cache.clear()

Ejemplo n.º 32

0

Mostrar archivo

Archivo: bulk_load_fpds.py Proyecto: fedspendingtransparency/usaspending-api

    def load_locations(self, fpds_broker_data, total_rows, pop_flag=False):

        start_time = datetime.now()
        for index, row in enumerate(fpds_broker_data, 1):
            if not (index % 10000):
                logger.info('Locations: Loading row {} of {} ({})'.format(str(index),
                                                                          str(total_rows),
                                                                          datetime.now() - start_time))
            if pop_flag:
                location_value_map = {"place_of_performance_flag": True}
                field_map = pop_field_map
            else:
                location_value_map = {'recipient_flag': True}
                field_map = le_field_map

            row = canonicalize_location_dict(row)

            # THIS ASSUMPTION DOES NOT HOLD FOR FPDS SINCE IT DOES NOT HAVE A PLACE OF PERFORMANCE CODE
            # We can assume that if the country code is blank and the place of performance code is NOT '00FORGN', then
            # the country code is USA
            # if pop_flag and not country_code and pop_code != '00FORGN':
            #     row[field_map.get('location_country_code')] = 'USA'

            # Get country code obj
            location_country_code_obj = self.country_code_map.get(row[field_map.get('location_country_code')])

            # Fix state code periods
            state_code = row.get(field_map.get('state_code'))
            if state_code is not None:
                location_value_map.update({'state_code': state_code.replace('.', '')})

            if location_country_code_obj:
                location_value_map.update({
                    'location_country_code': location_country_code_obj,
                    'country_name': location_country_code_obj.country_name
                })

                if location_country_code_obj.country_code != 'USA':
                    location_value_map.update({
                        'state_code': None,
                        'state_name': None
                    })
            else:
                # no country found for this code
                location_value_map.update({
                    'location_country_code': None,
                    'country_name': None
                })

            location_instance_data = load_data_into_model(
                Location(),
                row,
                value_map=location_value_map,
                field_map=field_map,
                as_dict=True)

            loc_instance = Location(**location_instance_data)
            loc_instance.load_city_county_data()
            loc_instance.fill_missing_state_data()
            loc_instance.fill_missing_zip5()

            if pop_flag:
                pop_bulk.append(loc_instance)
            else:
                lel_bulk.append(loc_instance)

        if pop_flag:
            logger.info('Bulk creating POP Locations (batch_size: {})...'.format(BATCH_SIZE))
            Location.objects.bulk_create(pop_bulk, batch_size=BATCH_SIZE)
        else:
            logger.info('Bulk creating LE Locations (batch_size: {})...'.format(BATCH_SIZE))
            Location.objects.bulk_create(lel_bulk, batch_size=BATCH_SIZE)

Ejemplo n.º 33

0

Mostrar archivo

Archivo: upsert_fabs_transactions.py Proyecto: fedspendingtransparency/usaspending-api

def insert_new_fabs(to_insert):
    place_of_performance_field_map = {
        "location_country_code": "place_of_perform_country_c",
        "country_name": "place_of_perform_country_n",
        "state_code": "place_of_perfor_state_code",
        "state_name": "place_of_perform_state_nam",
        "city_name": "place_of_performance_city",
        "county_name": "place_of_perform_county_na",
        "county_code": "place_of_perform_county_co",
        "foreign_location_description": "place_of_performance_forei",
        "zip_4a": "place_of_performance_zip4a",
        "congressional_code": "place_of_performance_congr",
        "performance_code": "place_of_performance_code",
        "zip_last4": "place_of_perform_zip_last4",
        "zip5": "place_of_performance_zip5",
    }

    legal_entity_location_field_map = {
        "location_country_code": "legal_entity_country_code",
        "country_name": "legal_entity_country_name",
        "state_code": "legal_entity_state_code",
        "state_name": "legal_entity_state_name",
        "city_name": "legal_entity_city_name",
        "city_code": "legal_entity_city_code",
        "county_name": "legal_entity_county_name",
        "county_code": "legal_entity_county_code",
        "address_line1": "legal_entity_address_line1",
        "address_line2": "legal_entity_address_line2",
        "address_line3": "legal_entity_address_line3",
        "foreign_location_description": "legal_entity_foreign_descr",
        "congressional_code": "legal_entity_congressional",
        "zip_last4": "legal_entity_zip_last4",
        "zip5": "legal_entity_zip5",
        "foreign_postal_code": "legal_entity_foreign_posta",
        "foreign_province": "legal_entity_foreign_provi",
        "foreign_city_name": "legal_entity_foreign_city",
    }

    update_award_ids = []
    for row in to_insert:
        upper_case_dict_values(row)

        # Create new LegalEntityLocation and LegalEntity from the row data
        legal_entity_location = create_location(legal_entity_location_field_map, row, {"recipient_flag": True})
        recipient_name = row['awardee_or_recipient_legal']
        legal_entity = LegalEntity.objects.create(
            recipient_unique_id=row['awardee_or_recipient_uniqu'],
            recipient_name=recipient_name if recipient_name is not None else "",
            parent_recipient_unique_id=row['ultimate_parent_unique_ide'],
        )
        legal_entity_value_map = {
            "location": legal_entity_location,
            "business_categories": get_business_categories(row=row, data_type='fabs'),
            "business_types_description": row['business_types_desc'],
        }
        legal_entity = load_data_into_model(legal_entity, row, value_map=legal_entity_value_map, save=True)

        # Create the place of performance location
        pop_location = create_location(place_of_performance_field_map, row, {"place_of_performance_flag": True})

        # Find the toptier awards from the subtier awards
        awarding_agency = Agency.get_by_subtier_only(row["awarding_sub_tier_agency_c"])
        funding_agency = Agency.get_by_subtier_only(row["funding_sub_tier_agency_co"])

        # Generate the unique Award ID
        # "ASST_AW_" + awarding_sub_tier_agency_c + fain + uri

        # this will raise an exception if the cast to an int fails, that's ok since we don't want to process
        # non-numeric record type values
        record_type_int = int(row['record_type'])
        if record_type_int == 1:
            uri = row['uri'] if row['uri'] else '-NONE-'
            fain = '-NONE-'
        elif record_type_int in (2, 3):
            uri = '-NONE-'
            fain = row['fain'] if row['fain'] else '-NONE-'
        else:
            msg = "Invalid record type encountered for the following afa_generated_unique record: {}"
            raise Exception(msg.format(row['afa_generated_unique']))

        astac = row["awarding_sub_tier_agency_c"] if row["awarding_sub_tier_agency_c"] else "-NONE-"
        generated_unique_id = "ASST_AW_{}_{}_{}".format(astac, fain, uri)

        # Create the summary Award
        (created, award) = Award.get_or_create_summary_award(
            generated_unique_award_id=generated_unique_id,
            fain=row['fain'],
            uri=row['uri'],
            record_type=row['record_type'],
        )
        award.save()

        # Append row to list of Awards updated
        update_award_ids.append(award.id)

        try:
            last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S.%f").date()
        except ValueError:
            last_mod_date = datetime.strptime(str(row['modified_at']), "%Y-%m-%d %H:%M:%S").date()
        parent_txn_value_map = {
            "award": award,
            "awarding_agency": awarding_agency,
            "funding_agency": funding_agency,
            "recipient": legal_entity,
            "place_of_performance": pop_location,
            "period_of_performance_start_date": format_date(row['period_of_performance_star']),
            "period_of_performance_current_end_date": format_date(row['period_of_performance_curr']),
            "action_date": format_date(row['action_date']),
            "last_modified_date": last_mod_date,
            "type_description": row['assistance_type_desc'],
            "transaction_unique_id": row['afa_generated_unique'],
            "generated_unique_award_id": generated_unique_id,
        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
            "funding_amount": "total_funding_amount",
        }

        transaction_normalized_dict = load_data_into_model(
            TransactionNormalized(),  # thrown away
            row,
            field_map=fad_field_map,
            value_map=parent_txn_value_map,
            as_dict=True,
        )

        financial_assistance_data = load_data_into_model(TransactionFABS(), row, as_dict=True)  # thrown away

        # Hack to cut back on the number of warnings dumped to the log.
        financial_assistance_data['updated_at'] = cast_datetime_to_utc(financial_assistance_data['updated_at'])
        financial_assistance_data['created_at'] = cast_datetime_to_utc(financial_assistance_data['created_at'])
        financial_assistance_data['modified_at'] = cast_datetime_to_utc(financial_assistance_data['modified_at'])

        afa_generated_unique = financial_assistance_data['afa_generated_unique']
        unique_fabs = TransactionFABS.objects.filter(afa_generated_unique=afa_generated_unique)

        if unique_fabs.first():
            transaction_normalized_dict["update_date"] = datetime.now(timezone.utc)
            transaction_normalized_dict["fiscal_year"] = fy(transaction_normalized_dict["action_date"])

            # Update TransactionNormalized
            TransactionNormalized.objects.filter(id=unique_fabs.first().transaction.id).update(
                **transaction_normalized_dict
            )

            # Update TransactionFABS
            unique_fabs.update(**financial_assistance_data)
        else:
            # Create TransactionNormalized
            transaction_normalized = TransactionNormalized(**transaction_normalized_dict)
            transaction_normalized.save()

            # Create TransactionFABS
            transaction_fabs = TransactionFABS(transaction=transaction_normalized, **financial_assistance_data)
            transaction_fabs.save()

        # Update legal entity to map back to transaction
        legal_entity.transaction_unique_id = afa_generated_unique
        legal_entity.save()

    return update_award_ids

Ejemplo n.º 34

0

Mostrar archivo

Archivo: load_tas.py Proyecto: lenjonemcse/usaspending-api

    def csv_tas_loader(self, file_path):
        field_map = {
            "treasury_account_identifier": "ACCT_NUM",
            "allocation_transfer_agency_id": "ATA",
            "agency_id": "AID",
            "beginning_period_of_availability": "BPOA",
            "ending_period_of_availability": "EPOA",
            "availability_type_code": "A",
            "main_account_code": "MAIN",
            "sub_account_code": "SUB",
            "account_title": "GWA_TAS_NAME",
            "reporting_agency_id": "Agency AID",
            "reporting_agency_name": "Agency Name",
            "budget_bureau_code": "ADMIN_ORG",
            "budget_bureau_name": "Admin Org Name",
            "fr_entity_code": "FR Entity Type",
            "fr_entity_description": "FR Entity Description",
            "budget_function_code": "Function Code",
            "budget_function_title": "Function Description",
            "budget_subfunction_code": "Sub Function Code",
            "budget_subfunction_title": "Sub Function Description",
        }

        value_map = {
            "data_source": "USA",
            "tas_rendering_label": self.generate_tas_rendering_label,
            "awarding_toptier_agency": None,
            "funding_toptier_agency": None,
            "internal_start_date": lambda row: datetime.strftime(
                datetime.strptime(row["DT_TM_ESTAB"], "%m/%d/%Y  %H:%M:%S"), "%Y-%m-%d"
            ),
            "internal_end_date": lambda row: datetime.strftime(
                datetime.strptime(row["DT_END"], "%m/%d/%Y  %H:%M:%S"), "%Y-%m-%d"
            )
            if row["DT_END"]
            else None,
        }

        with RetrieveFileFromUri(file_path).get_file_object(True) as tas_list_file_object:
            # Get a total count for print out
            tas_list_reader = csv.DictReader(tas_list_file_object)
            total_count = len(list(tas_list_reader))

            # Reset the reader back to the beginning of the file
            tas_list_file_object.seek(0)
            tas_list_reader = csv.DictReader(tas_list_file_object)

            for count, row in enumerate(tas_list_reader, 1):
                for key, value in row.items():
                    row[key] = value.strip() or None

                # Check to see if we need to update or create a TreasuryAppropriationAccount record
                current_record = TreasuryAppropriationAccount.objects.filter(
                    treasury_account_identifier=row["ACCT_NUM"]
                ).first()
                taa_instance = current_record or TreasuryAppropriationAccount()

                # Don't load Financing TAS
                if row["financial_indicator_type2"] == "F":
                    if taa_instance.treasury_account_identifier:
                        taa_instance.delete()
                    logger.info("   Row contains Financing TAS, Skipping...")
                    continue

                load_data_into_model(taa_instance, row, field_map=field_map, value_map=value_map, save=True)

                if count % 1000 == 0:
                    logger.info("   Loaded {} rows of {}".format(count, total_count))

Ejemplo n.º 35

0

Mostrar archivo

    def insert_new_fpds(self, to_insert, total_rows):
        place_of_performance_field_map = {
            "location_country_code": "place_of_perform_country_c",
            "country_name": "place_of_perf_country_desc",
            "state_code": "place_of_performance_state",
            "state_name": "place_of_perfor_state_desc",
            "city_name": "place_of_perform_city_name",
            "county_name": "place_of_perform_county_na",
            "county_code": "place_of_perform_county_co",
            "zip_4a": "place_of_performance_zip4a",
            "congressional_code": "place_of_performance_congr",
            "zip_last4": "place_of_perform_zip_last4",
            "zip5": "place_of_performance_zip5",
        }

        legal_entity_location_field_map = {
            "location_country_code": "legal_entity_country_code",
            "country_name": "legal_entity_country_name",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_descrip",
            "city_name": "legal_entity_city_name",
            "county_name": "legal_entity_county_name",
            "county_code": "legal_entity_county_code",
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "zip4": "legal_entity_zip4",
            "congressional_code": "legal_entity_congressional",
            "zip_last4": "legal_entity_zip_last4",
            "zip5": "legal_entity_zip5",
        }

        for index, row in enumerate(to_insert, 1):
            upper_case_dict_values(row)

            # Create new LegalEntityLocation and LegalEntity from the row data
            legal_entity_location = create_location(
                legal_entity_location_field_map, row, {
                    "recipient_flag": True,
                    "is_fpds": True
                })
            recipient_name = row["awardee_or_recipient_legal"]
            legal_entity = LegalEntity.objects.create(
                recipient_unique_id=row["awardee_or_recipient_uniqu"],
                recipient_name=recipient_name
                if recipient_name is not None else "",
            )
            legal_entity_value_map = {
                "location":
                legal_entity_location,
                "business_categories":
                get_business_categories(row=row, data_type="fpds"),
                "is_fpds":
                True,
            }
            set_legal_entity_boolean_fields(row)
            legal_entity = load_data_into_model(
                legal_entity, row, value_map=legal_entity_value_map, save=True)

            # Create the place of performance location
            pop_location = create_location(place_of_performance_field_map, row,
                                           {"place_of_performance_flag": True})

            # Find the toptier awards from the subtier awards
            awarding_agency = Agency.get_by_subtier_only(
                row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_subtier_only(
                row["funding_sub_tier_agency_co"])

            # Generate the unique Award ID
            # "CONT_AW_" + agency_id + referenced_idv_agency_iden + piid + parent_award_id
            generated_unique_id = (
                "CONT_AW_" +
                (row["agency_id"] if row["agency_id"] else "-NONE-") + "_" +
                (row["referenced_idv_agency_iden"]
                 if row["referenced_idv_agency_iden"] else "-NONE-") + "_" +
                (row["piid"] if row["piid"] else "-NONE-") + "_" +
                (row["parent_award_id"]
                 if row["parent_award_id"] else "-NONE-"))

            # Create the summary Award
            (created, award) = Award.get_or_create_summary_award(
                generated_unique_award_id=generated_unique_id,
                piid=row["piid"])
            award.parent_award_piid = row.get("parent_award_id")
            award.save()

            # Append row to list of Awards updated
            AWARD_UPDATE_ID_LIST.append(award.id)

            try:
                last_mod_date = datetime.strptime(str(
                    row["last_modified"]), "%Y-%m-%d %H:%M:%S.%f").date()
            except ValueError:
                last_mod_date = datetime.strptime(str(row["last_modified"]),
                                                  "%Y-%m-%d %H:%M:%S").date()
            parent_txn_value_map = {
                "award":
                award,
                "awarding_agency":
                awarding_agency,
                "funding_agency":
                funding_agency,
                "recipient":
                legal_entity,
                "place_of_performance":
                pop_location,
                "period_of_performance_start_date":
                format_date(row["period_of_performance_star"]),
                "period_of_performance_current_end_date":
                format_date(row["period_of_performance_curr"]),
                "action_date":
                format_date(row["action_date"]),
                "last_modified_date":
                last_mod_date,
                "transaction_unique_id":
                row["detached_award_proc_unique"],
                "generated_unique_award_id":
                generated_unique_id,
                "is_fpds":
                True,
            }

            contract_field_map = {
                "type": "contract_award_type",
                "type_description": "contract_award_type_desc",
                "description": "award_description",
            }

            transaction_normalized_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=contract_field_map,
                value_map=parent_txn_value_map,
                as_dict=True,
            )

            contract_instance = load_data_into_model(
                TransactionFPDS(), row, as_dict=True)  # thrown away

            detached_award_proc_unique = contract_instance[
                "detached_award_proc_unique"]
            unique_fpds = TransactionFPDS.objects.filter(
                detached_award_proc_unique=detached_award_proc_unique)

            if unique_fpds.first():
                transaction_normalized_dict["update_date"] = datetime.now(
                    timezone.utc)
                transaction_normalized_dict["fiscal_year"] = fy(
                    transaction_normalized_dict["action_date"])

                # update TransactionNormalized
                TransactionNormalized.objects.filter(
                    id=unique_fpds.first().transaction.id).update(
                        **transaction_normalized_dict)

                # update TransactionFPDS
                unique_fpds.update(**contract_instance)
            else:
                # create TransactionNormalized
                transaction = TransactionNormalized(
                    **transaction_normalized_dict)
                transaction.save()

                # create TransactionFPDS
                transaction_fpds = TransactionFPDS(transaction=transaction,
                                                   **contract_instance)
                transaction_fpds.save()

            # Update legal entity to map back to transaction
            legal_entity.transaction_unique_id = detached_award_proc_unique
            legal_entity.save()

Ejemplo n.º 36

0

Mostrar archivo

def load_file_c(submission_attributes, db_cursor, award_financial_frame):
    """
    Process and load file C broker data.
    Note: this should run AFTER the D1 and D2 files are loaded because we try to join to those records to retrieve some
    additional information about the awarding sub-tier agency.
    """
    # this matches the file b reverse directive, but am repeating it here to ensure that we don't overwrite it as we
    # change up the order of file loading

    if not award_financial_frame.size:
        logger.warning("No File C (award financial) data found, skipping...")
        return

    reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$")

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    award_financial_frame["object_class"] = award_financial_frame.apply(
        get_object_class_row, axis=1)
    award_financial_frame["program_activity"] = award_financial_frame.apply(
        get_or_create_program_activity,
        axis=1,
        submission_attributes=submission_attributes)

    total_rows = award_financial_frame.shape[0]
    start_time = datetime.now()
    awards_touched = []

    # format award_financial_frame
    award_financial_frame = award_financial_frame.replace({np.nan: None})

    for index, row in enumerate(
            award_financial_frame.to_dict(orient="records"), 1):
        if not (index % 100):
            logger.info("C File Load: Loading row {} of {} ({})".format(
                str(index), str(total_rows),
                datetime.now() - start_time))

        upper_case_dict_values(row)

        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(
            row.get("tas_id"), db_cursor)
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        # Find a matching transaction record, so we can use its subtier agency information to match to (or create) an
        # Award record.

        # Find the award that this award transaction belongs to. If it doesn't exist, create it.
        filters = {}
        if row.get("piid"):
            filters["piid"] = row.get("piid")
            filters["parent_piid"] = row.get("parent_award_id")
        else:
            if row.get("fain") and not row.get("uri"):
                filters["fain"] = row.get("fain")
            elif row.get("uri") and not row.get("fain"):
                filters["uri"] = row.get("uri")
            else:
                filters["fain"] = row.get("fain")
                filters["uri"] = row.get("uri")

        award = find_matching_award(**filters)

        if award:
            awards_touched += [award]

        award_financial_data = FinancialAccountsByAwards()

        value_map_faba = {
            "award": award,
            "submission": submission_attributes,
            "reporting_period_start":
            submission_attributes.reporting_period_start,
            "reporting_period_end": submission_attributes.reporting_period_end,
            "treasury_account": treasury_account,
            "object_class": row.get("object_class"),
            "program_activity": row.get("program_activity"),
        }

        # Still using the cpe|fyb regex compiled above for reverse
        load_data_into_model(award_financial_data,
                             row,
                             value_map=value_map_faba,
                             save=True,
                             reverse=reverse)

    for key in skipped_tas:
        logger.info("Skipped %d rows due to missing TAS: %s",
                    skipped_tas[key]["count"], key)

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]["count"]

    logger.info(
        "Skipped a total of {} TAS rows for File C".format(total_tas_skipped))

    return [award.id for award in awards_touched if award]

Ejemplo n.º 37

0

Mostrar archivo

def load_file_b(submission_attributes, prg_act_obj_cls_data, db_cursor):
    """ Process and load file B broker data (aka TAS balances by program activity and object class). """
    reverse = re.compile(r"(_(cpe|fyb)$)|^transaction_obligated_amount$")

    # dictionary to capture TAS that were skipped and some metadata
    # tas = top-level key
    # count = number of rows skipped
    # rows = row numbers skipped, corresponding to the original row numbers in the file that was submitted
    skipped_tas = {}

    bulk_treasury_appropriation_account_tas_lookup(prg_act_obj_cls_data,
                                                   db_cursor)

    save_manager = BulkCreateManager(
        FinancialAccountsByProgramActivityObjectClass)
    for row in prg_act_obj_cls_data:
        # Check and see if there is an entry for this TAS
        treasury_account, tas_rendering_label = get_treasury_appropriation_account_tas_lookup(
            row.get("tas_id"))
        if treasury_account is None:
            update_skipped_tas(row, tas_rendering_label, skipped_tas)
            continue

        # get the corresponding account balances row (aka "File A" record)
        account_balances = AppropriationAccountBalances.objects.get(
            treasury_account_identifier=treasury_account,
            submission_id=submission_attributes.submission_id)

        financial_by_prg_act_obj_cls = FinancialAccountsByProgramActivityObjectClass(
        )

        value_map = {
            "submission":
            submission_attributes,
            "reporting_period_start":
            submission_attributes.reporting_period_start,
            "reporting_period_end":
            submission_attributes.reporting_period_end,
            "treasury_account":
            treasury_account,
            "appropriation_account_balances":
            account_balances,
            "object_class":
            get_object_class(row["object_class"],
                             row["by_direct_reimbursable_fun"]),
            "program_activity":
            get_program_activity(row, submission_attributes),
            "disaster_emergency_fund":
            get_disaster_emergency_fund(row),
        }

        save_manager.append(
            load_data_into_model(financial_by_prg_act_obj_cls,
                                 row,
                                 value_map=value_map,
                                 save=False,
                                 reverse=reverse))

    save_manager.save_stragglers()

    for key in skipped_tas:
        logger.info(
            f"Skipped {skipped_tas[key]['count']:,} rows due to missing TAS: {key}"
        )

    total_tas_skipped = 0
    for key in skipped_tas:
        total_tas_skipped += skipped_tas[key]["count"]

    logger.info(
        f"Skipped a total of {total_tas_skipped:,} TAS rows for File B")

Ejemplo n.º 38

0

Mostrar archivo

Archivo: update_transactions.py Proyecto: ece-jacob-scott/usaspending-api

    def update_transaction_contract(db_cursor,
                                    fiscal_year=None,
                                    page=1,
                                    limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFPDS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('detached_award_procurement_id', flat=True)

        query = "SELECT * FROM detached_award_procurement"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY detached_award_procurement_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        procurement_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "location_country_code": "legal_entity_country_code",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "state_code": "legal_entity_state_code",
            "zip4": "legal_entity_zip4"
        }

        legal_entity_location_value_map = {"recipient_flag": True}

        place_of_performance_field_map = {
            # not sure place_of_performance_locat maps exactly to city name
            # "city_name": "place_of_performance_locat", # location id doesn't mean it's a city. Can't use this mapping
            "congressional_code": "place_of_performance_congr",
            "state_code": "place_of_performance_state",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        place_of_performance_value_map = {"place_of_performance_flag": True}

        contract_field_map = {
            "type": "contract_award_type",
            "description": "award_description"
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(procurement_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of procurement data")

        # skip_count = 0

        start_time = datetime.now()
        for index, row in enumerate(procurement_data, 1):
            with db_transaction.atomic():
                # if TransactionFPDS.objects.values('detached_award_procurement_id').\
                #         filter(detached_award_procurement_id=str(row['detached_award_procurement_id'])).first():
                #     skip_count += 1
                #
                #     if not (skip_count % 100):
                #         logger.info('Skipped {} records so far'.format(str(skip_count)))

                if not (index % 100):
                    logger.info(
                        'D1 File Load: Loading row {} of {} ({})'.format(
                            str(index), str(total_rows),
                            datetime.now() - start_time))

                recipient_name = row['awardee_or_recipient_legal']
                if recipient_name is None:
                    recipient_name = ""

                legal_entity_location, created = get_or_create_location(
                    legal_entity_location_field_map, row,
                    copy(legal_entity_location_value_map))

                # Create the legal entity if it doesn't exist
                legal_entity, created = LegalEntity.objects.get_or_create(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)

                if created:
                    legal_entity_value_map = {
                        "location": legal_entity_location,
                    }
                    legal_entity = load_data_into_model(
                        legal_entity,
                        row,
                        value_map=legal_entity_value_map,
                        save=True)

                # Create the place of performance location
                pop_location, created = get_or_create_location(
                    place_of_performance_field_map, row,
                    copy(place_of_performance_value_map))

                # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
                # use the sub tier code to look it up. This code assumes that all incoming
                # records will supply an awarding subtier agency code
                if row['awarding_agency_code'] is None or len(
                        row['awarding_agency_code'].strip()) < 1:
                    awarding_subtier_agency_id = subtier_agency_map[
                        row["awarding_sub_tier_agency_c"]]
                    awarding_toptier_agency_id = subtier_to_agency_map[
                        awarding_subtier_agency_id]['toptier_agency_id']
                    awarding_cgac_code = toptier_agency_map[
                        awarding_toptier_agency_id]
                    row['awarding_agency_code'] = awarding_cgac_code

                # If funding toptier agency code (aka CGAC) is empty, try using the sub
                # tier funding code to look it up. Unlike the awarding agency, we can't
                # assume that the funding agency subtier code will always be present.
                if row['funding_agency_code'] is None or len(
                        row['funding_agency_code'].strip()) < 1:
                    funding_subtier_agency_id = subtier_agency_map.get(
                        row["funding_sub_tier_agency_co"])
                    if funding_subtier_agency_id is not None:
                        funding_toptier_agency_id = \
                            subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                        funding_cgac_code = toptier_agency_map[
                            funding_toptier_agency_id]
                    else:
                        funding_cgac_code = None
                    row['funding_agency_code'] = funding_cgac_code

                # Find the award that this award transaction belongs to. If it doesn't exist, create it.
                awarding_agency = Agency.get_by_toptier_subtier(
                    row['awarding_agency_code'],
                    row["awarding_sub_tier_agency_c"])
                created, award = Award.get_or_create_summary_award(
                    awarding_agency=awarding_agency,
                    piid=row.get('piid'),
                    fain=row.get('fain'),
                    uri=row.get('uri'),
                    parent_award_piid=row.get('parent_award_id'))
                award.save()

                award_update_id_list.append(award.id)
                award_contract_update_id_list.append(award.id)

                parent_txn_value_map = {
                    "award":
                    award,
                    "awarding_agency":
                    awarding_agency,
                    "funding_agency":
                    Agency.get_by_toptier_subtier(
                        row['funding_agency_code'],
                        row["funding_sub_tier_agency_co"]),
                    "recipient":
                    legal_entity,
                    "place_of_performance":
                    pop_location,
                    "period_of_performance_start_date":
                    format_date(row['period_of_performance_star']),
                    "period_of_performance_current_end_date":
                    format_date(row['period_of_performance_curr']),
                    "action_date":
                    format_date(row['action_date']),
                }

                transaction_dict = load_data_into_model(
                    TransactionNormalized(),  # thrown away
                    row,
                    field_map=contract_field_map,
                    value_map=parent_txn_value_map,
                    as_dict=True)

                transaction = TransactionNormalized.get_or_create_transaction(
                    **transaction_dict)
                transaction.save()

                contract_instance = load_data_into_model(
                    TransactionFPDS(),  # thrown away
                    row,
                    as_dict=True)

                transaction_contract = TransactionFPDS(transaction=transaction,
                                                       **contract_instance)
                # catch exception and do nothing if we see
                # "django.db.utils.IntegrityError: duplicate key value violates unique constraint"
                try:
                    transaction_contract.save()
                except IntegrityError:
                    pass

Ejemplo n.º 39

0

Mostrar archivo

def get_submission_attributes(broker_submission_id, submission_data):
    """
    For a specified broker submission, return the existing corresponding usaspending
    submission record or create and return a new one.
    """
    # check if we already have an entry for this broker submission id; if not, create one
    submission_attributes, created = SubmissionAttributes.objects.get_or_create(
        broker_submission_id=broker_submission_id)

    if created:
        # this is the first time we're loading this broker submission
        logger.info(
            'Creating broker submission id {}'.format(broker_submission_id))

    else:
        # we've already loaded this broker submission, so delete it before reloading
        # if there's another submission that references this one as a "previous submission"
        # do not proceed.
        # TODO: now that we're chaining submisisons together, get clarification on
        # what should happen when a submission in the middle of the chain is deleted
        downstream_submission = SubmissionAttributes.objects.filter(
            previous_submission=submission_attributes).first()
        if downstream_submission is not None:
            message = (
                'Broker submission {} (API submission id = {}) has a downstream submission (id={}) and '
                'cannot be deleted'.format(
                    broker_submission_id, submission_attributes.submission_id,
                    downstream_submission.submission_id))
            raise ValueError(message)

        logger.info(
            'Broker submission id {} already exists. It will be deleted.'.
            format(broker_submission_id))
        call_command('rm_submission', broker_submission_id)

    # Find the previous submission for this CGAC and fiscal year (if there is one)
    previous_submission = get_previous_submission(
        submission_data['cgac_code'], submission_data['reporting_fiscal_year'],
        submission_data['reporting_fiscal_period'])

    # Update and save submission attributes
    field_map = {
        'reporting_period_start': 'reporting_start_date',
        'reporting_period_end': 'reporting_end_date',
        'quarter_format_flag': 'is_quarter_format',
    }

    # Create our value map - specific data to load
    value_map = {
        'broker_submission_id':
        broker_submission_id,
        'reporting_fiscal_quarter':
        get_fiscal_quarter(submission_data['reporting_fiscal_period']),
        'previous_submission':
        None if previous_submission is None else previous_submission,
        # pull in broker's last update date to use as certified date
        'certified_date':
        submission_data['updated_at'].date()
        if type(submission_data['updated_at']) == datetime else None,
    }

    return load_data_into_model(submission_attributes,
                                submission_data,
                                field_map=field_map,
                                value_map=value_map,
                                save=True)

Ejemplo n.º 40

0

Mostrar archivo

Archivo: update_transactions.py Proyecto: ece-jacob-scott/usaspending-api

    def update_transaction_assistance(db_cursor,
                                      fiscal_year=None,
                                      page=1,
                                      limit=500000):

        # logger.info("Getting IDs for what's currently in the DB...")
        # current_ids = TransactionFABS.objects
        #
        # if fiscal_year:
        #     current_ids = current_ids.filter(action_date__fy=fiscal_year)
        #
        # current_ids = current_ids.values_list('published_award_financial_assistance_id', flat=True)

        query = "SELECT * FROM published_award_financial_assistance"
        arguments = []

        fy_begin = '10/01/' + str(fiscal_year - 1)
        fy_end = '09/30/' + str(fiscal_year)

        if fiscal_year:
            if arguments:
                query += " AND"
            else:
                query += " WHERE"
            query += ' action_date::Date BETWEEN %s AND %s'
            arguments += [fy_begin]
            arguments += [fy_end]
        query += ' ORDER BY published_award_financial_assistance_id LIMIT %s OFFSET %s'
        arguments += [limit, (page - 1) * limit]

        logger.info("Executing query on Broker DB => " + query %
                    (arguments[0], arguments[1], arguments[2], arguments[3]))

        db_cursor.execute(query, arguments)

        logger.info("Running dictfetchall on db_cursor")
        award_financial_assistance_data = dictfetchall(db_cursor)

        legal_entity_location_field_map = {
            "address_line1": "legal_entity_address_line1",
            "address_line2": "legal_entity_address_line2",
            "address_line3": "legal_entity_address_line3",
            "city_name": "legal_entity_city_name",
            "congressional_code": "legal_entity_congressional",
            "county_code": "legal_entity_county_code",
            "county_name": "legal_entity_county_name",
            "foreign_city_name": "legal_entity_foreign_city",
            "foreign_postal_code": "legal_entity_foreign_posta",
            "foreign_province": "legal_entity_foreign_provi",
            "state_code": "legal_entity_state_code",
            "state_name": "legal_entity_state_name",
            "zip5": "legal_entity_zip5",
            "zip_last4": "legal_entity_zip_last4",
            "location_country_code": "legal_entity_country_code"
        }

        place_of_performance_field_map = {
            "city_name": "place_of_performance_city",
            "performance_code": "place_of_performance_code",
            "congressional_code": "place_of_performance_congr",
            "county_name": "place_of_perform_county_na",
            "foreign_location_description": "place_of_performance_forei",
            "state_name": "place_of_perform_state_nam",
            "zip4": "place_of_performance_zip4a",
            "location_country_code": "place_of_perform_country_c"
        }

        fad_field_map = {
            "type": "assistance_type",
            "description": "award_description",
        }

        logger.info("Getting total rows")
        # rows_loaded = len(current_ids)
        total_rows = len(award_financial_assistance_data)  # - rows_loaded

        logger.info("Processing " + str(total_rows) +
                    " rows of assistance data")

        # skip_count = 0

        # ROW ITERATION STARTS HERE

        lel_bulk = []
        pop_bulk = []
        legal_entity_bulk = []
        award_bulk = []

        transaction_assistance_bulk = []
        transaction_normalized_bulk = []

        logger.info(
            'Getting legal entity location objects for {} rows...'.format(
                len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Recipient flag is true for LeL
            legal_entity_location = get_or_create_location(
                legal_entity_location_field_map,
                row, {"recipient_flag": True},
                save=False)

            lel_bulk.append(legal_entity_location)

        logger.info('Bulk creating {} legal entity location rows...'.format(
            len(lel_bulk)))
        try:
            Location.objects.bulk_create(lel_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info(
            'Getting place of performance objects for {} rows...'.format(
                len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            # Place of Performance flag is true for PoP
            pop_location = get_or_create_location(
                place_of_performance_field_map,
                row, {"place_of_performance_flag": True},
                save=False)

            pop_bulk.append(pop_location)

        logger.info('Bulk creating {} place of performance rows...'.format(
            len(pop_bulk)))
        try:
            Location.objects.bulk_create(pop_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting legal entity objects for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            recipient_name = row.get('awardee_or_recipient_legal', '')

            legal_entity = LegalEntity.objects.filter(
                recipient_unique_id=row['awardee_or_recipient_uniqu'],
                recipient_name=recipient_name).first()

            if legal_entity is None:
                legal_entity = LegalEntity(
                    recipient_unique_id=row['awardee_or_recipient_uniqu'],
                    recipient_name=recipient_name)
                legal_entity_value_map = {
                    "location": lel_bulk[index - 1],
                }
                legal_entity = load_data_into_model(
                    legal_entity,
                    row,
                    value_map=legal_entity_value_map,
                    save=False)

            legal_entity_bulk.append(legal_entity)

        logger.info('Bulk creating {} legal entity rows...'.format(
            len(legal_entity_bulk)))
        try:
            LegalEntity.objects.bulk_create(legal_entity_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        awarding_agency_list = []
        funding_agency_list = []

        logger.info('Getting award objects for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):
            # If awarding toptier agency code (aka CGAC) is not supplied on the D2 record,
            # use the sub tier code to look it up. This code assumes that all incoming
            # records will supply an awarding subtier agency code
            if row['awarding_agency_code'] is None or len(
                    row['awarding_agency_code'].strip()) < 1:
                awarding_subtier_agency_id = subtier_agency_map[
                    row["awarding_sub_tier_agency_c"]]
                awarding_toptier_agency_id = subtier_to_agency_map[
                    awarding_subtier_agency_id]['toptier_agency_id']
                awarding_cgac_code = toptier_agency_map[
                    awarding_toptier_agency_id]
                row['awarding_agency_code'] = awarding_cgac_code

            # If funding toptier agency code (aka CGAC) is empty, try using the sub
            # tier funding code to look it up. Unlike the awarding agency, we can't
            # assume that the funding agency subtier code will always be present.
            if row['funding_agency_code'] is None or len(
                    row['funding_agency_code'].strip()) < 1:
                funding_subtier_agency_id = subtier_agency_map.get(
                    row["funding_sub_tier_agency_co"])
                if funding_subtier_agency_id is not None:
                    funding_toptier_agency_id = \
                        subtier_to_agency_map[funding_subtier_agency_id]['toptier_agency_id']
                    funding_cgac_code = toptier_agency_map[
                        funding_toptier_agency_id]
                else:
                    funding_cgac_code = None
                row['funding_agency_code'] = funding_cgac_code

            # Find the award that this award transaction belongs to. If it doesn't exist, create it.
            awarding_agency = Agency.get_by_toptier_subtier(
                row['awarding_agency_code'], row["awarding_sub_tier_agency_c"])
            funding_agency = Agency.get_by_toptier_subtier(
                row['funding_agency_code'], row["funding_sub_tier_agency_co"])

            awarding_agency_list.append(awarding_agency)
            funding_agency_list.append(funding_agency)

            # award.save() is called in Award.get_or_create_summary_award by default
            created, award = Award.get_or_create_summary_award(
                awarding_agency=awarding_agency,
                fain=row.get('fain'),
                uri=row.get('uri'),
                save=False)

            award_bulk.append(award)
            award_update_id_list.append(award.id)

        logger.info('Bulk creating {} award rows...'.format(len(award_bulk)))
        try:
            Award.objects.bulk_create(award_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')

        logger.info('Getting transaction_normalized for {} rows...'.format(
            len(award_financial_assistance_data)))
        for index, row in enumerate(award_financial_assistance_data, 1):

            parent_txn_value_map = {
                "award":
                award_bulk[index - 1],
                "awarding_agency":
                awarding_agency_list[index - 1],
                "funding_agency":
                funding_agency_list[index - 1],
                "recipient":
                legal_entity_bulk[index - 1],
                "place_of_performance":
                pop_bulk[index - 1],
                "period_of_performance_start_date":
                format_date(row['period_of_performance_star']),
                "period_of_performance_current_end_date":
                format_date(row['period_of_performance_curr']),
                "action_date":
                format_date(row['action_date']),
            }

            transaction_dict = load_data_into_model(
                TransactionNormalized(),  # thrown away
                row,
                field_map=fad_field_map,
                value_map=parent_txn_value_map,
                as_dict=True)

            transaction_normalized = TransactionNormalized.get_or_create_transaction(
                **transaction_dict)
            transaction_normalized.fiscal_year = fy(
                transaction_normalized.action_date)
            transaction_normalized_bulk.append(transaction_normalized)

        logger.info('Bulk creating {} TransactionNormalized rows...'.format(
            len(transaction_normalized_bulk)))
        try:
            TransactionNormalized.objects.bulk_create(
                transaction_normalized_bulk)
        except IntegrityError:
            logger.info(
                'Tried and failed to insert duplicate transaction_normalized row. Continuing... '
            )

        for index, row in enumerate(award_financial_assistance_data, 1):
            financial_assistance_data = load_data_into_model(
                TransactionFABS(),  # thrown away
                row,
                as_dict=True)

            transaction_assistance = TransactionFABS(
                transaction=transaction_normalized_bulk[index - 1],
                **financial_assistance_data)
            transaction_assistance_bulk.append(transaction_assistance)

        logger.info('Bulk creating TransactionFABS rows...')
        try:
            TransactionFABS.objects.bulk_create(transaction_assistance_bulk)
        except IntegrityError:
            logger.info('!!! DUPLICATES FOUND. Continuing... ')