def test_query_and_upload_to_s3(self, mock_uploader): """ Assert query_and_upload_to_s3 uploads to S3 for each query. We only have test data reliably set for AWS, but this function should still execute *all* of the table_export_settings queries, effectively providing a syntax check on the SQL even if no results are found. """ today = self.today _, last_day_of_month = calendar.monthrange(today.year, today.month) curr_month_first_day = date(year=today.year, month=today.month, day=1) curr_month_last_day = date(year=today.year, month=today.month, day=last_day_of_month) date_range = (curr_month_first_day, curr_month_last_day) for table_export_setting in tasks.table_export_settings: mock_uploader.reset_mock() tasks.query_and_upload_to_s3( self.schema, self.aws_provider_uuid, dictify_table_export_settings(table_export_setting), date_range[0], date_range[1], ) if table_export_setting.provider == "aws": if table_export_setting.iterate_daily: # There are always TWO days of AWS test data. calls = mock_uploader.return_value.upload_file.call_args_list self.assertEqual(len(calls), 2) else: # There is always only ONE month of AWS test data. mock_uploader.return_value.upload_file.assert_called_once() else: # We ONLY have test data currently for AWS. mock_uploader.return_value.upload_file.assert_not_called()
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" LOG.info('Beginning upload_normalized_data') curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info( 'processing schema %s provider uuid %s', account['schema_name'], account['provider_uuid'], ) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, curr_month_first_day, curr_month_last_day, ) # Upload last month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, prev_month_first_day, prev_month_last_day, ) LOG.info('Completed upload_normalized_data')
def test_query_and_upload_skips_if_no_data(self, mock_uploader): """Assert query_and_upload_to_s3 uploads nothing if no data is found.""" table_export_setting = self.get_table_export_setting_by_name("reporting_awscostentrylineitem") tasks.query_and_upload_to_s3( self.schema, self.aws_provider_uuid, dictify_table_export_settings(table_export_setting), start_date=self.future_date, end_date=self.future_date, ) mock_uploader.return_value.upload_file.assert_not_called()
def test_query_and_upload_to_s3_multiple_days_multiple_rows(self, mock_uploader): """Assert query_and_upload_to_s3 for multiple days uploads multiple files.""" table_export_setting = self.get_table_export_setting_by_name("reporting_awscostentrylineitem_daily_summary") tasks.query_and_upload_to_s3( self.schema, self.aws_provider_uuid, dictify_table_export_settings(table_export_setting), start_date=self.yesterday_date, end_date=self.today_date, ) # expect one upload call for yesterday and one for today self.assertEqual(mock_uploader.return_value.upload_file.call_count, 2)
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" if not settings.ENABLE_S3_ARCHIVING: LOG.info("S3 Archiving is disabled. Not running task.") return LOG.info("Beginning upload_normalized_data") curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info("processing schema %s provider uuid %s", account["schema_name"], account["provider_uuid"]) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay(account["schema_name"], account["provider_uuid"], table_dict, curr_month_first_day, curr_month_last_day) # Upload last month's reports query_and_upload_to_s3.delay(account["schema_name"], account["provider_uuid"], table_dict, prev_month_first_day, prev_month_last_day) LOG.info("Completed upload_normalized_data")
def test_query_and_upload_to_s3_archiving_false(self): """Assert query_and_upload_to_s3 not run.""" today = self.today _, last_day_of_month = calendar.monthrange(today.year, today.month) curr_month_first_day = date(year=today.year, month=today.month, day=1) curr_month_last_day = date(year=today.year, month=today.month, day=last_day_of_month) date_range = (curr_month_first_day, curr_month_last_day) for table_export_setting in tasks.table_export_settings: with self.assertLogs("masu.celery.tasks", "INFO") as captured_logs: tasks.query_and_upload_to_s3( self.schema, self.aws_provider_uuid, dictify_table_export_settings(table_export_setting), date_range[0], date_range[1], ) self.assertIn("S3 Archiving is disabled. Not running task.", captured_logs.output[0])
def test_upload_normalized_data(self, mock_date, mock_upload, mock_orchestrator): """Test that the scheduled task uploads the correct normalized data.""" test_export_setting = TableExportSetting( provider="test", output_name="test", sql="test_sql", iterate_daily=False ) schema_name = "acct10001" provider_uuid = uuid.uuid4() mock_date.return_value = date(2015, 1, 5) mock_orchestrator.get_accounts.return_value = ( [{"schema_name": schema_name, "provider_uuid": provider_uuid}], [], ) current_month_start = date(2015, 1, 1) current_month_end = date(2015, 1, 31) prev_month_start = date(2014, 12, 1) prev_month_end = date(2014, 12, 31) call_curr_month = call.delay( schema_name, provider_uuid, dictify_table_export_settings(test_export_setting), current_month_start, current_month_end, ) call_prev_month = call.delay( schema_name, provider_uuid, dictify_table_export_settings(test_export_setting), prev_month_start, prev_month_end, ) with patch("masu.celery.tasks.table_export_settings", [test_export_setting]): tasks.upload_normalized_data() mock_upload.assert_has_calls([call_curr_month, call_prev_month]) mock_date.return_value = date(2012, 3, 31) current_month_start = date(2012, 3, 1) current_month_end = date(2012, 3, 31) prev_month_start = date(2012, 2, 1) prev_month_end = date(2012, 2, 29) call_curr_month = call.delay( schema_name, provider_uuid, dictify_table_export_settings(test_export_setting), current_month_start, current_month_end, ) call_prev_month = call.delay( schema_name, provider_uuid, dictify_table_export_settings(test_export_setting), prev_month_start, prev_month_end, ) with patch("masu.celery.tasks.table_export_settings", [test_export_setting]): tasks.upload_normalized_data() mock_upload.assert_has_calls([call_curr_month, call_prev_month])