def test_refresh_materialized_views(self): """Test that materialized views are refreshed.""" manifest_dict = { "assembly_id": "12345", "billing_period_start_datetime": DateAccessor().today_with_timezone("UTC"), "num_total_files": 2, "provider_uuid": self.aws_provider_uuid, "task": "170653c0-3e66-4b7e-a764-336496d7ca5a", } fake_aws = FakeAWSCostData(self.aws_provider) generator = AWSReportDataGenerator(self.tenant) generator.add_data_to_tenant(fake_aws) with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.add(**manifest_dict) manifest.save() refresh_materialized_views(self.schema, Provider.PROVIDER_AWS, manifest_id=manifest.id) views_to_check = [view for view in AWS_MATERIALIZED_VIEWS if "Cost" in view._meta.db_table] with schema_context(self.schema): for view in views_to_check: self.assertNotEqual(view.objects.count(), 0) with ReportManifestDBAccessor() as manifest_accessor: manifest = manifest_accessor.get_manifest_by_id(manifest.id) self.assertIsNotNone(manifest.manifest_completed_datetime)
def provider_post_delete_callback(*args, **kwargs): """ Asynchronously delete this Provider's archived data. Note: Signal receivers must accept keyword arguments (**kwargs). """ provider = kwargs["instance"] if provider.authentication_id: provider_auth_query = Provider.objects.exclude( uuid=provider.uuid).filter( authentication_id=provider.authentication_id) auth_count = provider_auth_query.count() if auth_count == 0: LOG.info("Deleting unreferenced ProviderAuthentication") auth_query = ProviderAuthentication.objects.filter( pk=provider.authentication_id) execute_delete_sql(auth_query) if provider.billing_source_id: provider_billing_query = Provider.objects.exclude( uuid=provider.uuid).filter( billing_source_id=provider.billing_source_id) billing_count = provider_billing_query.count() if billing_count == 0: LOG.info("Deleting unreferenced ProviderBillingSource") billing_source_query = ProviderBillingSource.objects.filter( pk=provider.billing_source_id) execute_delete_sql(billing_source_query) if not provider.customer: LOG.warning( "Provider %s has no Customer; we cannot call delete_archived_data.", provider.uuid) return customer = provider.customer customer.date_updated = DateHelper().now_utc customer.save() LOG.info("Deleting any related CostModelMap records") execute_delete_sql( CostModelMap.objects.filter(provider_uuid=provider.uuid)) if settings.ENABLE_S3_ARCHIVING or enable_trino_processing( provider.uuid, provider.type, provider.customer.schema_name): # Local import of task function to avoid potential import cycle. from masu.celery.tasks import delete_archived_data LOG.info("Deleting any archived data") delete_func = partial(delete_archived_data.delay, provider.customer.schema_name, provider.type, provider.uuid) transaction.on_commit(delete_func) LOG.info("Refreshing materialized views post-provider-delete uuid=%s.", provider.uuid) refresh_materialized_views(provider.customer.schema_name, provider.type, provider_uuid=provider.uuid, synchronous=True)
def load_azure_data(self, customer, static_data_file, credentials=None, data_source=None): """Load Azure data into the database.""" provider_type = Provider.PROVIDER_AZURE_LOCAL nise_provider_type = provider_type.replace("-local", "") report_name = "Test" if credentials is None: credentials = { "subscription_id": "11111111-1111-1111-1111-11111111", "tenant_id": "22222222-2222-2222-2222-22222222", "client_id": "33333333-3333-3333-3333-33333333", "client_secret": "MyPassW0rd!", } if data_source is None: data_source = {"resource_group": "resourcegroup1", "storage_account": "storageaccount1"} with patch.object(settings, "AUTO_DATA_INGEST", False): provider = baker.make( "Provider", type=provider_type, authentication__credentials=credentials, customer=customer, billing_source__data_source=data_source, ) template, static_data_path = self.prepare_template(provider_type, static_data_file) options = { "static_report_file": static_data_path, "azure_report_name": report_name, "azure_container_name": self.nise_data_path, } base_path = f"{self.nise_data_path}/{report_name}" for start_date, end_date, bill_date in self.dates: manifest = baker.make( "CostUsageReportManifest", _fill_optional=True, provider=provider, billing_period_start_datetime=bill_date, ) with open(static_data_path, "w") as f: f.write(template.render(start_date=start_date, end_date=end_date)) run(nise_provider_type.lower(), options) report_path = self.build_report_path(provider_type, bill_date, base_path) for report in os.scandir(report_path): if os.path.isdir(report): continue elif "manifest" in report.name.lower(): continue self.process_report(report, "PLAIN", provider_type, provider, manifest) with patch("masu.processor.tasks.chain"), patch.object(settings, "AUTO_DATA_INGEST", False): update_summary_tables( self.schema, provider_type, provider.uuid, start_date, end_date, manifest_id=manifest.id ) update_cost_model_costs(self.schema, provider.uuid, self.dh.last_month_start, self.dh.today) refresh_materialized_views(self.schema, provider_type) shutil.rmtree(base_path, ignore_errors=True)
def load_openshift_data(self, customer, static_data_file, cluster_id): """Load OpenShift data into the database.""" provider_type = Provider.PROVIDER_OCP with override_settings(AUTO_DATA_INGEST=False): provider = baker.make( "Provider", type=provider_type, authentication__provider_resource_name=cluster_id, billing_source__bucket="", customer=customer, ) template, static_data_path = self.prepare_template( provider_type, static_data_file) options = { "static_report_file": static_data_path, "insights_upload": self.nise_data_path, "ocp_cluster_id": cluster_id, } base_path = f"{self.nise_data_path}/{cluster_id}" for start_date, end_date, bill_date in self.dates: manifest = baker.make( "CostUsageReportManifest", _fill_optional=True, provider=provider, billing_period_start_datetime=bill_date, num_processed_files=0, num_total_files=3, ) with open(static_data_path, "w") as f: f.write( template.render(start_date=start_date, end_date=end_date)) run(provider_type.lower(), options) report_path = self.build_report_path(provider_type, bill_date, base_path) for report in os.scandir(report_path): shutil.move(report.path, f"{base_path}/{report.name}") for report in [f.path for f in os.scandir(base_path)]: if os.path.isdir(report): continue elif "manifest" in report.lower(): continue self.process_report(report, "PLAIN", provider_type, provider, manifest) with patch("masu.processor.tasks.chain"): update_summary_tables(self.schema, provider_type, provider.uuid, start_date, end_date, manifest_id=manifest.id) update_cost_model_costs(self.schema, provider.uuid, self.dh.last_month_start, self.dh.today) refresh_materialized_views(self.schema, provider_type) shutil.rmtree(report_path, ignore_errors=True)
def load_aws_data(self, customer, static_data_file, account_id=None, provider_resource_name=None): """Load AWS data into the database.""" provider_type = Provider.PROVIDER_AWS_LOCAL if account_id is None: account_id = "9999999999999" if provider_resource_name is None: provider_resource_name = "arn:aws:iam::999999999999:role/CostManagement" nise_provider_type = provider_type.replace("-local", "") report_name = "Test" with patch.object(settings, "AUTO_DATA_INGEST", False): provider = baker.make( "Provider", type=provider_type, authentication__provider_resource_name=provider_resource_name, customer=customer, billing_source__bucket="test-bucket", ) template, static_data_path = self.prepare_template(provider_type, static_data_file) options = { "static_report_file": static_data_path, "aws_report_name": report_name, "aws_bucket_name": self.nise_data_path, } base_path = f"{self.nise_data_path}/{report_name}" with schema_context(self.schema): baker.make("AWSAccountAlias", account_id=account_id, account_alias="Test Account") for start_date, end_date, bill_date in self.dates: manifest = baker.make( "CostUsageReportManifest", _fill_optional=True, provider=provider, billing_period_start_datetime=bill_date, ) with open(static_data_path, "w") as f: f.write(template.render(start_date=start_date, end_date=end_date, account_id=account_id)) run(nise_provider_type.lower(), options) report_path = self.build_report_path(provider_type, bill_date, base_path) for report in os.scandir(report_path): if os.path.isdir(report): for report in [f.path for f in os.scandir(f"{report_path}/{report.name}")]: if os.path.isdir(report): continue elif "manifest" in report.lower(): continue self.process_report(report, "GZIP", provider_type, provider, manifest) with patch("masu.processor.tasks.chain"), patch.object(settings, "AUTO_DATA_INGEST", False): update_summary_tables( self.schema, provider_type, provider.uuid, start_date, end_date, manifest_id=manifest.id ) update_cost_model_costs(self.schema, provider.uuid, self.dh.last_month_start, self.dh.today) refresh_materialized_views(self.schema, provider_type) shutil.rmtree(base_path, ignore_errors=True)
def provider_post_delete_callback(*args, **kwargs): """ Asynchronously delete this Provider's archived data. Note: Signal receivers must accept keyword arguments (**kwargs). """ provider = kwargs["instance"] if provider.authentication: auth_count = ( Provider.objects.exclude(uuid=provider.uuid).filter(authentication=provider.authentication).count() ) if auth_count == 0: provider.authentication.delete() if provider.billing_source: billing_count = ( Provider.objects.exclude(uuid=provider.uuid).filter(billing_source=provider.billing_source).count() ) if billing_count == 0: provider.billing_source.delete() provider_rate_objs = CostModelMap.objects.filter(provider_uuid=provider.uuid) if provider_rate_objs: provider_rate_objs.delete() if not provider.customer: LOG.warning("Provider %s has no Customer; we cannot call delete_archived_data.", provider.uuid) return customer = provider.customer customer.date_updated = DateHelper().now_utc customer.save() if settings.ENABLE_S3_ARCHIVING or enable_trino_processing(provider.uuid): # Local import of task function to avoid potential import cycle. from masu.celery.tasks import delete_archived_data delete_func = partial(delete_archived_data.delay, provider.customer.schema_name, provider.type, provider.uuid) transaction.on_commit(delete_func) refresh_materialized_views( provider.customer.schema_name, provider.type, provider_uuid=provider.uuid, synchronous=True )
def cleanup_provider_without_source(cleaning_list): provider_without_source = cleaning_list.get("providers_without_sources") if provider_without_source: materialized_views_to_update = [] for provider in provider_without_source: schema_name = provider.customer.schema_name provider_type = provider.type with schema_context(schema_name): LOG.info( f"Removing Provider without Source: {str(provider.name)} ({str(provider.uuid)}" ) Provider.objects.get(uuid=provider.uuid).delete() mat_view_dict = {"schema": schema_name, "type": provider_type} if mat_view_dict not in materialized_views_to_update: materialized_views_to_update.append(mat_view_dict) for mat_view in materialized_views_to_update: LOG.info(f"Refreshing Materialized Views: {str(mat_view)}") refresh_materialized_views(mat_view.get("schema"), mat_view.get("type"))
def add_data_to_tenant(self, data, product="ec2"): """Populate tenant with data.""" assert isinstance(data, FakeAWSCostData), "FakeAWSCostData type not provided" with tenant_context(self.tenant): # get or create alias AWSAccountAlias.objects.get_or_create( account_id=data.account_id, account_alias=data.account_alias) # create bill bill, _ = AWSCostEntryBill.objects.get_or_create(**data.bill) # create ec2 product product_data = data.product(product) ce_product, _ = AWSCostEntryProduct.objects.get_or_create( **product_data) # create pricing ce_pricing, _ = AWSCostEntryPricing.objects.get_or_create( **data.pricing) # add hourly data data_start = data.usage_start data_end = data.usage_end current = data_start while current < data_end: end_hour = current + DateHelper().one_hour # generate copy of data with 1 hour usage range. curr_data = copy.deepcopy(data) curr_data.usage_end = end_hour curr_data.usage_start = current # keep line items within the same AZ curr_data.availability_zone = data.availability_zone # get or create cost entry cost_entry_data = curr_data.cost_entry cost_entry_data.update({"bill": bill}) cost_entry, _ = AWSCostEntry.objects.get_or_create( **cost_entry_data) # create line item line_item_data = curr_data.line_item(product) model_instances = { "cost_entry": cost_entry, "cost_entry_bill": bill, "cost_entry_product": ce_product, "cost_entry_pricing": ce_pricing, } line_item_data.update(model_instances) line_item, _ = AWSCostEntryLineItem.objects.get_or_create( **line_item_data) current = end_hour self._populate_daily_table() self._populate_daily_summary_table() self._populate_tag_summary_table() refresh_materialized_views(self.tenant.schema_name, "AWS")
def load_openshift_on_cloud_data(self, provider_type, cluster_id, bills, report_periods): """Load OCP on AWS Daily Summary table.""" unique_fields = {} if provider_type in (Provider.PROVIDER_AWS, Provider.PROVIDER_AWS_LOCAL): daily_summary_recipe = "api.report.test.util.ocp_on_aws_daily_summary" project_summary_pod_recipe = "api.report.test.util.ocp_on_aws_project_daily_summary_pod" project_summary_storage_recipe = "api.report.test.util.ocp_on_aws_project_daily_summary_storage" dbaccessor, tags_update_method, ui_update_method = ( AWSReportDBAccessor, "populate_ocp_on_aws_tags_summary_table", "populate_ocp_on_aws_ui_summary_tables", ) with schema_context(self.schema): account_alias = random.choice( list(AWSAccountAlias.objects.all())) unique_fields = { "currency_code": self.currency, "account_alias": account_alias } elif provider_type in (Provider.PROVIDER_AZURE, Provider.PROVIDER_AZURE_LOCAL): daily_summary_recipe = "api.report.test.util.ocp_on_azure_daily_summary" project_summary_pod_recipe = "api.report.test.util.ocp_on_azure_project_daily_summary_pod" project_summary_storage_recipe = "api.report.test.util.ocp_on_azure_project_daily_summary_storage" dbaccessor, tags_update_method, ui_update_method = ( AzureReportDBAccessor, "populate_ocp_on_azure_tags_summary_table", "populate_ocp_on_azure_ui_summary_tables", ) unique_fields = { "currency": self.currency, "subscription_guid": self.faker.uuid4() } elif provider_type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL): daily_summary_recipe = "api.report.test.util.ocp_on_gcp_daily_summary" project_summary_pod_recipe = "api.report.test.util.ocp_on_gcp_project_daily_summary_pod" project_summary_storage_recipe = "api.report.test.util.ocp_on_gcp_project_daily_summary_storage" dbaccessor, tags_update_method, ui_update_method = ( GCPReportDBAccessor, "populate_ocp_on_gcp_tags_summary_table", "populate_ocp_on_gcp_ui_summary_tables", ) unique_fields = { "currency": self.currency, "account_id": self.faker.pystr_format(string_format="???????????????"), } provider = Provider.objects.filter(type=provider_type).first() for dates, bill, report_period in zip(self.dates, bills, report_periods): start_date, end_date, bill_date = dates if provider_type in (Provider.PROVIDER_GCP, Provider.PROVIDER_GCP_LOCAL): unique_fields["invoice_month"] = bill_date.strftime("%Y%m") LOG.info( f"load OCP-on-{provider.type} data for start: {start_date}, end: {end_date}" ) with schema_context(self.schema): days = (end_date - start_date).days + 1 for i in range(days): baker.make_recipe( daily_summary_recipe, report_period=report_period, cluster_id=cluster_id, cluster_alias=cluster_id, usage_start=start_date + timedelta(i), usage_end=start_date + timedelta(i), cost_entry_bill=bill, tags=cycle(self.tags), source_uuid=provider.uuid, _quantity=len(self.tags), **unique_fields, ) baker.make_recipe( project_summary_pod_recipe, report_period=report_period, cluster_id=cluster_id, cluster_alias=cluster_id, usage_start=start_date + timedelta(i), usage_end=start_date + timedelta(i), cost_entry_bill=bill, tags=cycle(self.tags), source_uuid=provider.uuid, _quantity=len(self.tags), **unique_fields, ) baker.make_recipe( project_summary_storage_recipe, report_period=report_period, cluster_id=cluster_id, cluster_alias=cluster_id, usage_start=start_date + timedelta(i), usage_end=start_date + timedelta(i), cost_entry_bill=bill, tags=cycle(self.tags), source_uuid=provider.uuid, _quantity=len(self.tags), **unique_fields, ) with dbaccessor(self.schema) as accessor: # update tags cls_method = getattr(accessor, tags_update_method) cls_method([bill.id for bill in bills], self.first_start_date, self.last_end_date) # update ui tables sql_params = { "schema_name": self.schema, "start_date": self.first_start_date, "end_date": self.last_end_date, "source_uuid": provider.uuid, "cluster_id": cluster_id, "cluster_alias": cluster_id, } cls_method = getattr(accessor, ui_update_method) cls_method(sql_params) refresh_materialized_views(self.schema, provider_type, provider_uuid=provider.uuid, synchronous=True)