def __init__(self, schema): """Establish the database connection. Args: schema (str): The customer schema to associate with """ super().__init__(schema) self._datetime_format = Config.AWS_DATETIME_STR_FORMAT self.date_accessor = DateAccessor() self.jinja_sql = JinjaSql()
def setUpClass(cls): """Set up the test class with required objects.""" super().setUpClass() cls.accessor = AWSReportDBAccessor(cls.schema) cls.report_schema = cls.accessor.report_schema cls.all_tables = list(AWS_CUR_TABLE_MAP.values()) cls.creator = ReportObjectCreator(cls.schema) cls.date_accessor = DateAccessor() cls.manifest_accessor = ReportManifestDBAccessor()
def upload_normalized_data(): """Scheduled task to export normalized data to s3.""" LOG.info('Beginning upload_normalized_data') curr_date = DateAccessor().today() curr_month_range = calendar.monthrange(curr_date.year, curr_date.month) curr_month_first_day = date(year=curr_date.year, month=curr_date.month, day=1) curr_month_last_day = date(year=curr_date.year, month=curr_date.month, day=curr_month_range[1]) previous_month = curr_date - relativedelta(months=1) prev_month_range = calendar.monthrange(previous_month.year, previous_month.month) prev_month_first_day = date(year=previous_month.year, month=previous_month.month, day=1) prev_month_last_day = date(year=previous_month.year, month=previous_month.month, day=prev_month_range[1]) accounts, _ = Orchestrator.get_accounts() for account in accounts: LOG.info( 'processing schema %s provider uuid %s', account['schema_name'], account['provider_uuid'], ) for table in table_export_settings: # Celery does not serialize named tuples, convert it # to a dict before handing it off to the celery task. table_dict = dictify_table_export_settings(table) # Upload this month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, curr_month_first_day, curr_month_last_day, ) # Upload last month's reports query_and_upload_to_s3.delay( account['schema_name'], account['provider_uuid'], table_dict, prev_month_first_day, prev_month_last_day, ) LOG.info('Completed upload_normalized_data')
def test_populate_awstags_summary_table(self): """Test that the AWS tags summary table is populated.""" bill_ids = [] ce_table_name = AWS_CUR_TABLE_MAP['cost_entry'] tags_summary_name = AWS_CUR_TABLE_MAP['tags_summary'] ce_table = getattr(self.accessor.report_schema, ce_table_name) today = DateAccessor().today_with_timezone('UTC') last_month = today - relativedelta.relativedelta(months=1) for cost_entry_date in (today, last_month): bill = self.creator.create_cost_entry_bill(cost_entry_date) bill_ids.append(str(bill.id)) cost_entry = self.creator.create_cost_entry(bill, cost_entry_date) for family in ['Storage', 'Compute Instance', 'Database Storage', 'Database Instance']: product = self.creator.create_cost_entry_product(family) pricing = self.creator.create_cost_entry_pricing() reservation = self.creator.create_cost_entry_reservation() self.creator.create_cost_entry_line_item( bill, cost_entry, product, pricing, reservation ) start_date, end_date = self.accessor._session.query( func.min(ce_table.interval_start), func.max(ce_table.interval_start) ).first() query = self.accessor._get_db_obj_query(tags_summary_name) initial_count = query.count() self.accessor.populate_line_item_daily_table(start_date, end_date, bill_ids) self.accessor.populate_line_item_daily_summary_table(start_date, end_date, bill_ids) self.accessor.populate_tags_summary_table() self.assertNotEqual(query.count(), initial_count) tags = query.all() tag_keys = [tag.key for tag in tags] self.accessor._cursor.execute( """SELECT DISTINCT jsonb_object_keys(tags) FROM reporting_awscostentrylineitem_daily""" ) expected_tag_keys = self.accessor._cursor.fetchall() expected_tag_keys = [tag[0] for tag in expected_tag_keys] self.assertEqual(sorted(tag_keys), sorted(expected_tag_keys))
def summarize_reports(reports_to_summarize): """ Summarize reports returned from line summary task. Args: reports_to_summarize (list) list of reports to process Returns: None """ reports_to_summarize = [ report for report in reports_to_summarize if report ] reports_deduplicated = [ dict(t) for t in {tuple(d.items()) for d in reports_to_summarize} ] for report in reports_deduplicated: # For day-to-day summarization we choose a small window to # cover new data from a window of days. # This saves us from re-summarizing unchanged data and cuts down # on processing time. There are override mechanisms in the # Updater classes for when full-month summarization is # required. with ReportManifestDBAccessor() as manifest_accesor: if manifest_accesor.manifest_ready_for_summary( report.get("manifest_id")): start_date = DateAccessor().today() - datetime.timedelta( days=2) start_date = start_date.strftime("%Y-%m-%d") end_date = DateAccessor().today().strftime("%Y-%m-%d") LOG.info("report to summarize: %s", str(report)) update_summary_tables.delay( report.get("schema_name"), report.get("provider_type"), report.get("provider_uuid"), start_date=start_date, end_date=end_date, manifest_id=report.get("manifest_id"), )
def test_azure_update_summary_cost_model_costs(self): """Test to verify Azure derived cost summary is calculated.""" updater = AzureCostModelCostUpdater(schema=self.schema, provider=self.azure_provider) start_date = DateAccessor().today_with_timezone("UTC") bill_date = start_date.replace(day=1).date() updater.update_summary_cost_model_costs() with AzureReportDBAccessor(self.schema) as accessor: bill = accessor.get_cost_entry_bills_by_date(bill_date)[0] self.assertIsNotNone(bill.derived_cost_datetime)
def test_populate_pod_label_summary_table(self, mock_vacuum): """Test that the pod label summary table is populated.""" report_table_name = OCP_REPORT_TABLE_MAP['report'] agg_table_name = OCP_REPORT_TABLE_MAP['pod_label_summary'] report_table = getattr(self.accessor.report_schema, report_table_name) today = DateAccessor().today_with_timezone('UTC') last_month = today - relativedelta.relativedelta(months=1) for start_date in (today, last_month): period = self.creator.create_ocp_report_period( self.ocp_provider_uuid, period_date=start_date ) period = self.creator.create_ocp_report_period( self.ocp_provider_uuid, period_date=start_date ) report = self.creator.create_ocp_report(period, start_date) self.creator.create_ocp_usage_line_item(period, report) with schema_context(self.schema): report_entry = report_table.objects.all().aggregate( Min('interval_start'), Max('interval_start') ) start_date = report_entry['interval_start__min'] end_date = report_entry['interval_start__max'] query = self.accessor._get_db_obj_query(agg_table_name) with schema_context(self.schema): initial_count = query.count() self.accessor.populate_line_item_daily_table( start_date, end_date, self.cluster_id ) self.accessor.populate_pod_label_summary_table() self.assertNotEqual(query.count(), initial_count) with schema_context(self.schema): tags = query.all() tag_keys = [tag.key for tag in tags] with self.accessor._conn.cursor() as cursor: cursor.execute( """SELECT DISTINCT jsonb_object_keys(pod_labels) FROM reporting_ocpusagelineitem_daily""" ) expected_tag_keys = cursor.fetchall() expected_tag_keys = [tag[0] for tag in expected_tag_keys] self.assertEqual(sorted(tag_keys), sorted(expected_tag_keys))
def test_today_override(self): """Test today() with override.""" Config.DEBUG = True Config.MASU_DATE_OVERRIDE = '2018-01-01 15:47:33' accessor = DateAccessor() today = accessor.today() self.assertEqual(today.year, 2018) self.assertEqual(today.month, 1) self.assertEqual(today.day, 1)
def _setup_ready_status(self): """Set status to READY state.""" ready_status = { "provider_id": self.provider_uuid, "status": ProviderStatusCode.READY, "last_message": "none", "timestamp": DateAccessor().today(), "retries": 0, } with ProviderStatus(self.aws_provider_uuid) as accessor: accessor.add(**ready_status)
def test_get_manifest_file_not_found(self, mock_download_file): """Test _get_manifest method when file is not found.""" mock_datetime = DateAccessor().today() mock_download_file.side_effect = AWSReportDownloaderNoFileError("fake error") manifest_file, manifest_json, manifest_modified_timestamp = self.aws_report_downloader._get_manifest( mock_datetime ) self.assertEqual(manifest_file, "") self.assertEqual(manifest_json, self.aws_report_downloader.empty_manifest) self.assertIsNone(manifest_modified_timestamp)
def _setup_ready_status(self): """set status to READY state. """ ready_status = { 'provider_id': self.provider_uuid, 'status': ProviderStatusCode.READY, 'last_message': 'none', 'timestamp': DateAccessor().today(), 'retries': 0, } with ProviderStatus(self.aws_provider_uuid) as accessor: accessor.add(**ready_status)
def setUpClass(cls): """Set up the test class.""" billing_start = DateAccessor().today_with_timezone('UTC').replace( day=1) cls.manifest_dict = { 'assembly_id': '1234', 'billing_period_start_datetime': billing_start, 'num_total_files': 2, 'provider_id': 1 } cls.manifest_accessor = ReportManifestDBAccessor()
def test_today_with_timezone_string(self): """Test that a timezone string works as expected.""" string_tz = "UTC" current_utc_time = datetime.utcnow() accessor = DateAccessor() result_time = accessor.today_with_timezone(string_tz) self.assertEqual(current_utc_time.date(), result_time.date()) self.assertEqual(current_utc_time.hour, result_time.hour) self.assertEqual(current_utc_time.minute, result_time.minute) self.assertEqual(result_time.tzinfo, pytz.UTC)
def log_last_completed_datetime(self): """ Convinence method for logging processing completed. Args: None Returns: None """ self._obj.last_completed_datetime = DateAccessor().today_with_timezone( 'UTC')
def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema = schema self._provider = provider self._manifest = manifest self._date_accessor = DateAccessor()
def test_populate_line_item_daily_summary_table(self): """Test that the line item daily summary table populates.""" report_table_name = OCP_REPORT_TABLE_MAP['report'] summary_table_name = OCP_REPORT_TABLE_MAP['line_item_daily_summary'] report_table = getattr(self.accessor.report_schema, report_table_name) summary_table = getattr(self.accessor.report_schema, summary_table_name) start_date = DateAccessor().today_with_timezone('UTC') period = self.creator.create_ocp_report_period(start_date, provider_id=self.ocp_provider_id, cluster_id=self.cluster_id) report = self.creator.create_ocp_report(period, start_date) for _ in range(25): self.creator.create_ocp_usage_line_item(period, report) start_date, end_date = self.accessor._session.query( func.min(report_table.interval_start), func.max(report_table.interval_start) ).first() start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0) end_date = end_date.replace(hour=0, minute=0, second=0, microsecond=0) query = self.accessor._get_db_obj_query(summary_table_name) initial_count = query.count() self.accessor.populate_line_item_daily_table(start_date, end_date, self.cluster_id) self.accessor.populate_line_item_daily_summary_table(start_date, end_date, self.cluster_id) self.assertNotEqual(query.count(), initial_count) result_start_date, result_end_date = self.accessor._session.query( func.min(summary_table.usage_start), func.max(summary_table.usage_start) ).first() self.assertEqual(result_start_date, start_date) self.assertEqual(result_end_date, end_date) entry = query.first() summary_columns = [ 'cluster_id', 'namespace', 'node', 'node_capacity_cpu_core_hours', 'node_capacity_cpu_cores', 'node_capacity_memory_gigabyte_hours', 'node_capacity_memory_gigabytes', 'pod', 'pod_labels', 'pod_limit_cpu_core_hours', 'pod_limit_memory_gigabyte_hours', 'pod_request_cpu_core_hours', 'pod_request_memory_gigabyte_hours', 'pod_usage_cpu_core_hours', 'pod_usage_memory_gigabyte_hours', 'usage_end', 'usage_start' ] for column in summary_columns: self.assertIsNotNone(getattr(entry, column))
def test_get_usage_periods_by_date(self): """Test that report periods are returned by date filter.""" period_start = DateAccessor().today_with_timezone('UTC').replace(day=1) prev_period_start = period_start - relativedelta.relativedelta(months=1) reporting_period = self.creator.create_ocp_report_period(period_start) prev_reporting_period = self.creator.create_ocp_report_period( prev_period_start ) periods = self.accessor.get_usage_periods_by_date(period_start.date()) self.assertIn(reporting_period, periods) periods = self.accessor.get_usage_periods_by_date(prev_period_start.date()) self.assertIn(prev_reporting_period, periods)
def __init__(self, schema, provider, manifest): """Establish the database connection. Args: schema (str): The customer schema to associate with """ self._schema_name = schema self._provider = provider self._manifest = manifest with ReportingCommonDBAccessor() as reporting_common: self._column_map = reporting_common.column_map self._date_accessor = DateAccessor()
def log_last_started_datetime(self): """ Convinence method for logging start processing. Args: None Returns: None """ self._obj.last_started_datetime = DateAccessor().today_with_timezone("UTC") self._obj.save()
def test_get_last_report_completed_datetime(self): """Test that the last completed report datetime is returned.""" manifest = self.manifest_accessor.add(**self.manifest_dict) earlier_time = DateAccessor().today_with_timezone('UTC') later_time = earlier_time + datetime.timedelta(hours=1) ReportStatsDBAccessor('earlier_report', manifest.id).update(last_completed_datetime=earlier_time) ReportStatsDBAccessor('later_report', manifest.id).update(last_completed_datetime=later_time) result = self.manifest_accessor.get_last_report_completed_datetime(manifest.id) self.assertEqual(result, later_time)
def setUpClass(cls): """Set up the test class with required objects.""" super().setUpClass() with ReportingCommonDBAccessor() as report_common_db: cls.column_map = report_common_db.column_map cls.accessor = AzureReportDBAccessor("acct10001", cls.column_map) cls.report_schema = cls.accessor.report_schema cls.all_tables = list(AZURE_REPORT_TABLE_MAP.values()) cls.creator = ReportObjectCreator(cls.schema, cls.column_map) cls.date_accessor = DateAccessor() cls.manifest_accessor = ReportManifestDBAccessor()
def setUpClass(cls): """Set up the class.""" super().setUpClass() cls.fake = faker.Faker() cls.fake_reports = [ {"file": cls.fake.word(), "compression": "GZIP"}, {"file": cls.fake.word(), "compression": "PLAIN"}, ] cls.fake_account = fake_arn(service="iam", generate_account_id=True) cls.today = DateAccessor().today_with_timezone("UTC") cls.yesterday = cls.today - timedelta(days=1)
def setUp(self): """Set up the test class.""" super().setUp() self.schema = self.schema_name billing_start = DateAccessor().today_with_timezone('UTC').replace(day=1) self.manifest_dict = { 'assembly_id': '1234', 'billing_period_start_datetime': billing_start, 'num_total_files': 2, 'provider_uuid': self.provider_uuid, } self.manifest_accessor = ReportManifestDBAccessor()
def __init__(self, schema, column_map): """Establish the database connection. Args: schema (str): The customer schema to associate with column_map (dict): A mapping of report columns to database columns """ super().__init__(schema, column_map) self._datetime_format = Config.AWS_DATETIME_STR_FORMAT self.column_map = column_map self._schema_name = schema self.date_accessor = DateAccessor()
def test_get_billing_month_start(self): """Test that a proper datetime is returend for bill month.""" dh = DateHelper() accessor = DateAccessor() expected = dh.this_month_start.date() today = dh.today str_input = str(today) datetime_input = today date_input = today.date() self.assertEqual(accessor.get_billing_month_start(str_input), expected) self.assertEqual(accessor.get_billing_month_start(datetime_input), expected) self.assertEqual(accessor.get_billing_month_start(date_input), expected)
def test_today_override_override_not_set_debug_false(self): """Test today() with override not set when debug is false.""" Config.DEBUG = False Config.MASU_DATE_OVERRIDE = None accessor = DateAccessor() today = accessor.today() expected_date = datetime.today() self.assertEqual(today.year, expected_date.year) self.assertEqual(today.month, expected_date.month) self.assertEqual(today.day, expected_date.day)
def setUpClass(cls): """Set up the class.""" super().setUpClass() cls.fake = faker.Faker() cls.fake_reports = [ {'file': cls.fake.word(), 'compression': 'GZIP'}, {'file': cls.fake.word(), 'compression': 'PLAIN'}, ] cls.fake_account = fake_arn(service='iam', generate_account_id=True) cls.today = DateAccessor().today_with_timezone('UTC') cls.yesterday = cls.today - timedelta(days=1)
def test_start_manifest_processing_priority_queue(self, mock_download_manifest, mock_task, mock_inspect): """Test start_manifest_processing using priority queue.""" test_queues = [ { "name": "qe-account", "provider_uuid": str(uuid4()), "queue-name": "priority", "expected": "priority" }, { "name": "qe-account", "provider_uuid": None, "queue-name": "priority", "expected": "summary" }, { "name": "qe-account", "provider_uuid": str(uuid4()), "queue-name": None, "expected": "summary" }, ] mock_manifest = { "mock_downloader_manifest": { "manifest_id": 1, "files": [{ "local_file": "file1.csv", "key": "filekey" }] } } for test in test_queues: with self.subTest(test=test.get("name")): mock_download_manifest.return_value = mock_manifest.get( "mock_downloader_manifest") orchestrator = Orchestrator( provider_uuid=test.get("provider_uuid"), queue_name=test.get("queue-name")) account = self.mock_accounts[0] orchestrator.start_manifest_processing( account.get("customer_name"), account.get("credentials"), account.get("data_source"), "AWS-local", account.get("schema_name"), account.get("provider_uuid"), DateAccessor().get_billing_months(1)[0], ) actual_queue = mock_task.call_args.args[1].options.get("queue") self.assertEqual(actual_queue, test.get("expected"))
def setUp(self): """Set up a test with database objects.""" super().setUp() today = DateAccessor().today_with_timezone("UTC") billing_start = today.replace(day=1) self.manifest_dict = { "assembly_id": "1234", "billing_period_start_datetime": billing_start, "num_total_files": 2, "provider_id": self.gcp_provider.uuid, } self.manifest = self.manifest_accessor.add(**self.manifest_dict)
def setUp(self): """Set up shared test variables.""" super().setUp() self.fake_get_report_args = { "customer_name": self.fake.word(), "authentication": self.fake_account, "provider_type": Provider.PROVIDER_AWS, "schema_name": self.fake.word(), "billing_source": self.fake.word(), "provider_uuid": self.aws_provider_uuid, "report_month": str(DateAccessor().today()), }