def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() # Deduplicate schema_name since accounts may have the same schema_name but different providers schemas = set(account['schema_name'] for account in accounts) for schema in schemas: for table in table_export_settings: # Upload this month's reports query_and_upload_to_s3(schema, table, (curr_month_first_day, curr_month_last_day)) # Upload last month's reports query_and_upload_to_s3(schema, table, (prev_month_first_day, prev_month_last_day))
def test_query_and_upload_to_s3(self, mock_uploader): """ Assert query_and_upload_to_s3 uploads to S3 for each query. We only have test data reliably set for AWS, but this function should still execute *all* of the table_export_settings queries, effectively providing a syntax check on the SQL even if no results are found. """ today = self.today _, last_day_of_month = calendar.monthrange(today.year, today.month) curr_month_first_day = date(year=today.year, month=today.month, day=1) curr_month_last_day = date(year=today.year, month=today.month, day=last_day_of_month) date_range = (curr_month_first_day, curr_month_last_day) for table_export_setting in table_export_settings: mock_uploader.reset_mock() query_and_upload_to_s3(self.schema, self.aws_provider_uuid, table_export_setting, date_range) if table_export_setting.provider == 'aws': if table_export_setting.iterate_daily: # There are always TWO days of AWS test data. calls = mock_uploader.return_value.upload_file.call_args_list self.assertEqual(len(calls), 2) else: # There is always only ONE month of AWS test data. mock_uploader.return_value.upload_file.assert_called_once() else: # We ONLY have test data currently for AWS. mock_uploader.return_value.upload_file.assert_not_called()
def test_query_and_upload_skips_if_no_data(self, mock_uploader): """Assert query_and_upload_to_s3 uploads nothing if no data is found.""" date_range = (self.future_date, self.future_date) table_export_setting = self.get_table_export_setting_by_name( 'reporting_awscostentrylineitem' ) query_and_upload_to_s3(self.schema, table_export_setting, date_range) mock_uploader.return_value.upload_file.assert_not_called()
def test_query_and_upload_to_s3_multiple_days_multiple_rows(self, mock_uploader): """Assert query_and_upload_to_s3 for multiple days uploads multiple files.""" date_range = (self.yesterday_date, self.today_date) table_export_setting = self.get_table_export_setting_by_name( 'reporting_awscostentrylineitem_daily_summary' ) query_and_upload_to_s3(self.schema, table_export_setting, date_range) # expect one upload call for yesterday and one for today self.assertEqual(mock_uploader.return_value.upload_file.call_count, 2)
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" log_uuid = str(uuid.uuid4()) LOG.info('%s Beginning upload_normalized_data', log_uuid) curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info( '%s processing schema %s provider uuid %s', log_uuid, account['schema_name'], account['provider_uuid'], ) for table in table_export_settings: # Upload this month's reports query_and_upload_to_s3( account['schema_name'], account['provider_uuid'], table, (curr_month_first_day, curr_month_last_day), ) # Upload last month's reports query_and_upload_to_s3( account['schema_name'], account['provider_uuid'], table, (prev_month_first_day, prev_month_last_day), ) LOG.info('%s Completed upload_normalized_data', log_uuid)
def test_query_and_upload_to_s3(self, mock_uploader): """Assert query_and_upload_to_s3 uploads to S3 with one file.""" today = self.today _, last_day_of_month = calendar.monthrange(today.year, today.month) curr_month_first_day = date(year=today.year, month=today.month, day=1) curr_month_last_day = date( year=today.year, month=today.month, day=last_day_of_month ) date_range = (curr_month_first_day, curr_month_last_day) table_export_setting = self.get_table_export_setting_by_name( 'reporting_awscostentrylineitem' ) query_and_upload_to_s3(self.schema, table_export_setting, date_range) mock_uploader.return_value.upload_file.assert_called_once()