def handle(self, *args, **options): logger.info("==== Starting FPDS nightly data load ====") if options.get("date"): date = options.get("date")[0] date = datetime.strptime(date, "%Y-%m-%d").date() else: data_load_date_obj = ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"] ).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime("%Y-%m-%d") else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime("%Y-%m-%d") logger.info("Processing data for FPDS starting from %s" % date) with timer("retrieval of deleted FPDS IDs", logger.info): ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date) if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") with timer("retrieval of new/modified FPDS data ID list", logger.info): total_insert = self.get_fpds_transaction_ids(date=date) if len(total_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(total_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture") # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"]).delete() ExternalDataLoadDate( last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT["fpds"] ).save() logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def perform_load(self, ids_to_delete, ids_to_insert): if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") if len(ids_to_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(ids_to_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture")
def handle(self, *args, **options): logger.info('Starting FABS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FABS data', logger.info): to_insert, to_delete = self.diff_fabs_data( db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FABS data', logger.info): self.delete_stale_fabs(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FABS data', logger.info): fabs_broker_data = self.get_fabs_data( db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data...', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction FABS data', logger.info): self.load_transaction_fabs(fabs_broker_data, total_rows) award_update_id_list = [award.id for award in award_lookup] with timer('Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')
def handle(self, *args, **options): logger.info('Starting FPDS nightly data load...') if options.get('date'): date = options.get('date')[0] date = datetime.strptime(date, '%Y-%m-%d').date() else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FPDS starting from %s' % date) with timer('retrieving/diff-ing FPDS Data', logger.info): to_insert, ids_to_delete = self.get_fpds_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: with timer('deleting stale FPDS data', logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: with timer('inserting new FPDS data', logger.info): self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds'] ).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups. EXTERNAL_DATA_TYPE_DICT['fpds']).save() logger.info('FPDS NIGHTLY UPDATE FINISHED!')
def handle(self, *args, **options): logger.info('Starting FABS nightly data load...') # Use date provided or pull most recent ExternalDataLoadDate if options.get('date'): date = options.get('date')[0] else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FABS starting from %s' % date) # Retrieve FABS data with timer('retrieving/diff-ing FABS Data', logger.info): to_insert, ids_to_delete = self.get_fabs_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: # Create a file with the deletion IDs and place in a bucket for ElasticSearch self.send_deletes_to_s3(ids_to_delete) # Delete FABS records by ID with timer('deleting stale FABS data', logger.info): self.delete_stale_fabs(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: # Add FABS records with timer('inserting new FABS data', logger.info): self.insert_new_fabs(to_insert=to_insert, total_rows=total_rows) # Update Awards based on changed FABS records with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) # Update AwardCategories based on changed FABS records with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Check the linkages from file C to FABS records and update any that are missing with timer('updating C->D linkages', logger.info): update_c_to_d_linkages('assistance') else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).save() logger.info('FABS NIGHTLY UPDATE FINISHED!')
def handle(self, *args, **options): logger.info("==== Starting FPDS nightly data load ====") if options.get("date"): date = options.get("date")[0] date = datetime.strptime(date, "%Y-%m-%d").date() else: default_last_load_date = datetime.now(timezone.utc) - timedelta(days=1) date = get_last_load_date("fpds", default=default_last_load_date).date() processing_start_datetime = datetime.now(timezone.utc) logger.info("Processing data for FPDS starting from %s" % date) with timer("retrieval of deleted FPDS IDs", logger.info): ids_to_delete = self.get_deleted_fpds_data_from_s3(date=date) if len(ids_to_delete) > 0: with timer("deletion of all stale FPDS data", logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info("No FPDS records to delete at this juncture") with timer("retrieval of new/modified FPDS data ID list", logger.info): total_insert = self.get_fpds_transaction_ids(date=date) if len(total_insert) > 0: # Add FPDS records with timer("insertion of new FPDS data in batches", logger.info): self.insert_all_new_fpds(total_insert) # Update Awards based on changed FPDS records with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update FPDS-specific Awards based on the info in child transactions with timer("updating contract-specific awards to reflect their latest transaction info", logger.info): update_contract_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FPDS records with timer("updating award category variables", logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FPDS records and update any that are missing with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("contract") else: logger.info("No FPDS records to insert or modify at this juncture") # Update the date for the last time the data load was run update_last_load_date("fpds", processing_start_datetime) logger.info("FPDS NIGHTLY UPDATE COMPLETE")
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting updates to award data...') all_records_flag = options.get('all') fiscal_year = options.get('fiscal_year') award_update_id_list = [] award_contract_update_id_list = [] if not all_records_flag: if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 # Lists to store for update_awards and update_contract_awards award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\ values_list('award_id', flat=True) award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\ values_list('transaction__award_id', flat=True) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards() if all_records_flag else update_awards( tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info...', logger.info): if all_records_flag: update_contract_awards() else: update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories( ) if all_records_flag else update_award_categories( tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids): if ids_to_upsert or externally_updated_award_ids: update_award_ids = copy(externally_updated_award_ids) if ids_to_upsert: with timer("inserting new FABS data", logger.info): update_award_ids.extend(insert_all_new_fabs(ids_to_upsert)) if update_award_ids: update_award_ids = tuple(set(update_award_ids)) # Convert to tuple and remove duplicates. with timer("updating awards to reflect their latest associated transaction info", logger.info): update_awards(update_award_ids) with timer("updating award category variables", logger.info): update_award_categories(update_award_ids) with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("assistance") else: logger.info("Nothing to insert...")
def upsert_fabs_transactions(ids_to_upsert, externally_updated_award_ids): if ids_to_upsert or externally_updated_award_ids: update_award_ids = copy(externally_updated_award_ids) if ids_to_upsert: with timer("inserting new FABS data", logger.info): update_award_ids.extend(insert_all_new_fabs(ids_to_upsert)) if update_award_ids: update_award_ids = tuple(set( update_award_ids)) # Convert to tuple and remove duplicates. with timer( "updating awards to reflect their latest associated transaction info", logger.info): update_awards(update_award_ids) with timer("updating award category variables", logger.info): update_award_categories(update_award_ids) with timer("updating C->D linkages", logger.info): update_c_to_d_linkages("assistance") else: logger.info("Nothing to insert...")
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting updates to award data...') all_records_flag = options.get('all') fiscal_year = options.get('fiscal_year') award_update_id_list = [] award_contract_update_id_list = [] if not all_records_flag: if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 # Lists to store for update_awards and update_contract_awards award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\ values_list('award_id', flat=True) award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\ values_list('transaction__award_id', flat=True) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards() if all_records_flag else update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info...', logger.info): if all_records_flag: update_contract_awards() else: update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories() if all_records_flag else update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def forwards_func(apps, schema_editor): update_award_categories()
def handle(self, *args, **options): logger.info("Starting FABS data load script...") start_date = datetime.now(timezone.utc).strftime('%Y-%m-%d') fabs_load_db_id = lookups.EXTERNAL_DATA_TYPE_DICT['fabs'] data_load_date_obj = ExternalDataLoadDate.objects.filter( external_data_type_id=fabs_load_db_id).first() if options.get("date"): # if provided, use cli data load_from_date = options.get("date")[0] elif data_load_date_obj: # else if last run is in DB, use that load_from_date = data_load_date_obj.last_load_date else: # Default is yesterday at midnight load_from_date = (datetime.now(timezone.utc) - timedelta(days=1)).strftime('%Y-%m-%d') logger.info('Processing data for FABS starting from %s' % load_from_date) with timer('retrieving/diff-ing FABS Data', logger.info): upsert_transactions = self.get_fabs_transaction_ids( date=load_from_date) with timer("obtaining delete records", logger.info): ids_to_delete = self.get_fabs_records_to_delete( date=load_from_date) if ids_to_delete: self.store_deleted_fabs(ids_to_delete) # Delete FABS records by ID with timer("deleting stale FABS data", logger.info): self.delete_stale_fabs(ids_to_delete=ids_to_delete) del ids_to_delete else: logger.info("Nothing to delete...") if upsert_transactions: # Add FABS records with timer('inserting new FABS data', logger.info): self.insert_all_new_fabs(all_new_to_insert=upsert_transactions) # Update Awards based on changed FABS records with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(AWARD_UPDATE_ID_LIST)) # Update AwardCategories based on changed FABS records with timer('updating award category variables', logger.info): update_award_categories(tuple(AWARD_UPDATE_ID_LIST)) # Check the linkages from file C to FABS records and update any that are missing with timer('updating C->D linkages', logger.info): update_c_to_d_linkages('assistance') else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs'] ).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups. EXTERNAL_DATA_TYPE_DICT['fabs']).save() logger.info('FABS UPDATE FINISHED!')
def handle(self, *args, **options): logger.info('Starting FABS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FABS data', logger.info): to_insert, to_delete = self.diff_fabs_data(db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FABS data', logger.info): self.delete_stale_fabs(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FABS data', logger.info): fabs_broker_data = self.get_fabs_data(db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data...', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized( fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction FABS data', logger.info): self.load_transaction_fabs(fabs_broker_data, total_rows) award_update_id_list = [award.id for award in award_lookup] with timer( 'Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')