def handle(self, *args, **options): logger.info('Starting updating awarding agencies...') fiscal_year = options.get('fiscal_year')[0] page = options.get('page') limit = options.get('limit') page = page[0] if page else 1 limit = limit[0] if limit else 500000 if options.get('contracts', None): with timer('D1 (contracts/FPDS) awarding/funding agencies updates', logger.info): self.update_awarding_funding_agency(fiscal_year, 'D1', page=page, limit=limit) elif options.get('assistance', None): with timer( 'D2 (assistance/FABS) awarding/funding agencies updates', logger.info): self.update_awarding_funding_agency(fiscal_year, 'D2', page=page, limit=limit) else: logger.error('Not a valid data type: --assistance,--contracts') logger.info('Finished')
def handle(self, *args, **options): logger.info('Starting FABS nightly data load...') # Use date provided or pull most recent ExternalDataLoadDate if options.get('date'): date = options.get('date')[0] else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FABS starting from %s' % date) # Retrieve FABS data with timer('retrieving/diff-ing FABS Data', logger.info): to_insert, ids_to_delete = self.get_fabs_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: # Create a file with the deletion IDs and place in a bucket for ElasticSearch self.send_deletes_to_elasticsearch(ids_to_delete) # Delete FABS records by ID with timer('deleting stale FABS data', logger.info): self.delete_stale_fabs(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: # Add FABS records with timer('inserting new FABS data', logger.info): self.insert_new_fabs(to_insert=to_insert, total_rows=total_rows) # Update Awards based on changed FABS records with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) # Update AwardCategories based on changed FABS records with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fabs']).save() logger.info('FABS NIGHTLY UPDATE FINISHED!')
def test_timer_times(capsys): 'Verify that timer shows longer times for slower operations' pattern = re.compile(r'([\d\.e\-]+) sec') with timer(): print('Doing a thing') output0 = capsys.readouterr()[0] time0 = float(pattern.search(output0).group(1)) with timer(): print('Doing a slower thing') time.sleep(0.1) output1 = capsys.readouterr()[0] time1 = float(pattern.search(output1).group(1)) assert time1 > time0
def test_timer(capsys): 'Verify that timer helper executes without error' with timer(): print('Doing a thing') output = capsys.readouterr()[0] assert 'Beginning' in output assert 'finished' in output
def handle(self, *args, **options): logger.info('Starting updates to award data...') all_records_flag = options.get('all') fiscal_year = options.get('fiscal_year') award_update_id_list = [] award_contract_update_id_list = [] if not all_records_flag: if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 # Lists to store for update_awards and update_contract_awards award_update_id_list = TransactionNormalized.objects.filter(action_date__fy=fiscal_year).\ values_list('award_id', flat=True) award_contract_update_id_list = TransactionFPDS.objects.filter(action_date__fy=fiscal_year).\ values_list('transaction__award_id', flat=True) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards() if all_records_flag else update_awards( tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info...', logger.info): if all_records_flag: update_contract_awards() else: update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories( ) if all_records_flag else update_award_categories( tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting FPDS nightly data load...') if options.get('date'): date = options.get('date')[0] date = datetime.strptime(date, '%Y-%m-%d').date() else: data_load_date_obj = ExternalDataLoadDate.objects. \ filter(external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds']).first() if not data_load_date_obj: date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%d') else: date = data_load_date_obj.last_load_date start_date = datetime.utcnow().strftime('%Y-%m-%d') logger.info('Processing data for FPDS starting from %s' % date) with timer('retrieving/diff-ing FPDS Data', logger.info): to_insert, ids_to_delete = self.get_fpds_data(date=date) total_rows = len(to_insert) total_rows_delete = len(ids_to_delete) if total_rows_delete > 0: with timer('deleting stale FPDS data', logger.info): self.delete_stale_fpds(ids_to_delete=ids_to_delete) else: logger.info('Nothing to delete...') if total_rows > 0: with timer('inserting new FPDS data', logger.info): self.insert_new_fpds(to_insert=to_insert, total_rows=total_rows) with timer( 'updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer( 'updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...') # Update the date for the last time the data load was run ExternalDataLoadDate.objects.filter( external_data_type_id=lookups.EXTERNAL_DATA_TYPE_DICT['fpds'] ).delete() ExternalDataLoadDate(last_load_date=start_date, external_data_type_id=lookups. EXTERNAL_DATA_TYPE_DICT['fpds']).save() logger.info('FPDS NIGHTLY UPDATE FINISHED!')
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') save = options.get('save') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data location insert', logger.info): self.update_location_transaction_contract( db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit, save=save) if not options['contracts']: with timer('D2 historical data location insert', logger.info): self.update_location_transaction_assistance( db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit, save=save) logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting historical data load...') db_cursor = connections['data_broker'].cursor() fiscal_year = options.get('fiscal_year') page = options.get('page') limit = options.get('limit') if fiscal_year: fiscal_year = fiscal_year[0] logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) else: fiscal_year = 2017 page = page[0] if page else 1 limit = limit[0] if limit else 500000 if not options['assistance']: with timer('D1 historical data load', logger.info): self.update_transaction_contract(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) if not options['contracts']: with timer('D2 historical data load', logger.info): self.update_transaction_assistance(db_cursor=db_cursor, fiscal_year=fiscal_year, page=page, limit=limit) with timer('updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('updating contract-specific awards to reflect their latest transaction info', logger.info): update_contract_awards(tuple(award_contract_update_id_list)) with timer('updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) # Done! logger.info('FINISHED')
def handle(self, *args, **options): logger.info('Starting row deletion...') if options['batches']: limit = options['batches'] * options['batchsize'] else: limit = None with timer('executing query', logger.info): cursor = self.fabs_cursor(limit) batch_no = 1 while ((not options['batches']) or (batch_no <= options['batches'])): message = 'Batch {} of {} rows'.format(batch_no, options['batchsize']) with timer(message, logging.info): rows = cursor.fetchmany(options['batchsize']) if not rows: logger.info('No further rows; finished') return ids = [r[0] for r in rows] with timer('deleting rows', logger.info): TransactionNormalized.objects.\ filter(assistance_data__afa_generated_unique__in=ids).delete() batch_no += 1 logger.info('{} batches finished, complete'.format(batch_no - 1))
def handle(self, *args, **options): """ Updates the column ussgl498100_upward_adjust_pri_deliv_orders_oblig_unpaid_cpe due to the incorrect mapping in settings.py """ ds_cursor = connection.cursor() logger.info('Begin updating file B and C') broker_cols_b = self.get_list_of_broker_cols(financial_accounts_oc) broker_cols_type_b = self.get_list_of_broker_cols_types(financial_accounts_oc) website_cols_b = self.get_website_row_formatted(financial_accounts_oc) website_update_text_b = self.get_cols_to_update(financial_accounts_oc) website_cols_joins_b = self.get_file_table_joins(financial_accounts_oc) broker_cols_c = self.get_list_of_broker_cols(financial_accounts_awards) broker_cols_type_c = self.get_list_of_broker_cols_types(financial_accounts_awards) website_cols_c = self.get_website_row_formatted(financial_accounts_awards) website_update_text_c = self.get_cols_to_update(financial_accounts_awards) website_cols_joins_c = self.get_file_table_joins(financial_accounts_awards) with timer('getting submission ids to update', logger.info): submissions_to_update = self.get_list_of_submissions() for submission in submissions_to_update: submission_id = submission[0] # File B Updates logger.info('loading rows data to update File B submission {}'.format(submission_id)) with timer('retrieving rows to update for File B submission {}'.format(submission_id), logger.info): get_rows_to_update_query = self.get_rows_to_update('B', submission_id, broker_cols_b, broker_cols_type_b, website_cols_b) ds_cursor.execute(get_rows_to_update_query) with timer('updating rows for File B submission {}'.format(submission_id), logger.info): update_rows = self.update_website_rows( 'financial_accounts_by_program_activity_object_class', 'file_b_rows_to_update', website_update_text_b, website_cols_joins_b ) ds_cursor.execute(update_rows) # File C updates with timer('retrieving rows to update for File C submission {}'.format(submission_id), logger.info): get_rows_to_update_query = self.get_rows_to_update( 'C', submission_id, broker_cols_c, broker_cols_type_c, website_cols_c) ds_cursor.execute(get_rows_to_update_query) with timer('updating rows for File C submission {}'.format(submission_id), logger.info): update_rows = self.update_website_rows( 'financial_accounts_by_awards', 'file_c_rows_to_update', website_update_text_c, website_cols_joins_c ) ds_cursor.execute(update_rows) ds_cursor.execute("DROP TABLE file_b_rows_to_update") ds_cursor.execute("DROP TABLE file_c_rows_to_update") logger.info('Done updating file B and C mappings')
def handle(self, *args, **options): logger.info('Starting FABS bulk data load...') db_cursor = connections['data_broker'].cursor() ds_cursor = connection.cursor() fiscal_year = options.get('fiscal_year') if fiscal_year: fiscal_year = fiscal_year[0] else: fiscal_year = 2017 logger.info('Processing data for Fiscal Year ' + str(fiscal_year)) with timer('Diff-ing FABS data', logger.info): to_insert, to_delete = self.diff_fabs_data(db_cursor=db_cursor, ds_cursor=ds_cursor, fiscal_year=fiscal_year) total_rows = len(to_insert) total_rows_delete = len(to_delete) if total_rows_delete > 0: with timer('Deleting stale FABS data', logger.info): self.delete_stale_fabs(to_delete=to_delete) if total_rows > 0: # Set lookups after deletions to only get latest self.set_lookup_maps() with timer('Get Broker FABS data', logger.info): fabs_broker_data = self.get_fabs_data(db_cursor=db_cursor, fiscal_year=fiscal_year, to_insert=to_insert) with timer('Loading POP Location data...', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows, pop_flag=True) with timer('Loading LE Location data', logger.info): self.load_locations(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Legal Entity data', logger.info): self.load_legal_entity(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Award data', logger.info): self.load_awards(fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction Normalized data', logger.info): self.load_transaction_normalized( fabs_broker_data=fabs_broker_data, total_rows=total_rows) with timer('Loading Transaction FABS data', logger.info): self.load_transaction_fabs(fabs_broker_data, total_rows) award_update_id_list = [award.id for award in award_lookup] with timer( 'Updating awards to reflect their latest associated transaction info', logger.info): update_awards(tuple(award_update_id_list)) with timer('Updating award category variables', logger.info): update_award_categories(tuple(award_update_id_list)) else: logger.info('Nothing to insert...FINISHED!')