Esempio n. 1
0
    def test_update_summary_tables_without_bill(self, mock_daily,
                                                mock_summary):
        """Test that summary tables are properly run."""
        self.manifest.num_processed_files = self.manifest.num_total_files

        start_date = self.date_accessor.today_with_timezone("UTC")
        end_date = start_date + datetime.timedelta(days=1)
        bill_date = start_date.replace(day=1).date()

        with GCPReportDBAccessor(self.schema) as accessor:
            bill = accessor.get_cost_entry_bills_by_date(bill_date)[0]
            bill.summary_data_creation_datetime = start_date
            bill.save()

        start_date_str = start_date.strftime("%Y-%m-%d")
        end_date_str = end_date.strftime("%Y-%m-%d")

        expected_start_date = start_date.date()
        expected_end_date = end_date.date()

        self.updater.update_daily_tables(start_date_str, end_date_str)
        mock_daily.assert_called_with(expected_start_date, expected_end_date,
                                      [str(bill.id)])
        mock_summary.assert_not_called()

        self.updater.update_summary_tables(start_date_str, end_date_str)
        mock_summary.assert_called_with(expected_start_date, expected_end_date,
                                        [str(bill.id)])

        with GCPReportDBAccessor(self.schema) as accessor:
            bill = accessor.get_cost_entry_bills_by_date(bill_date)[0]
            self.assertIsNotNone(bill.summary_data_creation_datetime)
            self.assertIsNotNone(bill.summary_data_updated_datetime)
Esempio n. 2
0
    def update_summary_cost_model_costs(self, start_date=None, end_date=None):
        """Update the GCP summary table with the charge information.

        Args:
            start_date (str, Optional) - Start date of range to update derived cost.
            end_date (str, Optional) - End date of range to update derived cost.

        Returns
            None

        """
        LOG.debug(
            "Starting charge calculation updates for provider: %s. Dates: %s-%s",
            self._provider.uuid,
            str(start_date),
            str(end_date),
        )

        self._update_markup_cost(start_date, end_date)

        with GCPReportDBAccessor(self._schema) as accessor:
            LOG.debug(
                "Updating AWS derived cost summary for schema: %s and provider: %s",
                self._schema, self._provider.uuid)
            bills = accessor.bills_for_provider_uuid(self._provider.uuid,
                                                     start_date)
            with schema_context(self._schema):
                for bill in bills:
                    bill.derived_cost_datetime = DateAccessor(
                    ).today_with_timezone("UTC")
                    bill.save()
    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        with CostModelDBAccessor(self._schema,
                                 self._provider.uuid) as cost_model_accessor:
            markup = cost_model_accessor.markup
            markup_value = float(markup.get("value", 0)) / 100

        with GCPReportDBAccessor(self._schema) as accessor:
            # Need these bills on the session to update dates after processing
            with schema_context(self._schema):
                bills = accessor.bills_for_provider_uuid(
                    self._provider.uuid, start_date)
                bill_ids = [str(bill.id) for bill in bills]
                current_bill_id = bills.first().id if bills else None

            if current_bill_id is None:
                msg = f"No bill was found for {start_date}. Skipping summarization"
                LOG.info(msg)
                return start_date, end_date

            for start, end in date_range_pair(start_date,
                                              end_date,
                                              step=settings.TRINO_DATE_STEP):
                LOG.info(
                    "Updating GCP report summary tables from parquet: \n\tSchema: %s"
                    "\n\tProvider: %s \n\tDates: %s - %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                )
                accessor.delete_line_item_daily_summary_entries_for_date_range(
                    self._provider.uuid, start, end)
                accessor.populate_line_item_daily_summary_table_presto(
                    start, end, self._provider.uuid, current_bill_id,
                    markup_value)
                accessor.populate_enabled_tag_keys(start, end, bill_ids)
            accessor.populate_tags_summary_table(bill_ids)
            accessor.update_line_item_daily_summary_with_enabled_tags(
                start_date, end_date, bill_ids)
            for bill in bills:
                if bill.summary_data_creation_datetime is None:
                    bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone(
                        "UTC")
                bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone(
                    "UTC")
                bill.save()

        return start_date, end_date
Esempio n. 4
0
    def test_update_daily_summary_tables(self, mock_presto, mock_tag_update, mock_summary_update, mock_delete):
        """Test that we run Presto summary."""
        start_str = self.dh.this_month_start.isoformat()
        end_str = self.dh.this_month_end.isoformat()
        start, end = self.updater._get_sql_inputs(start_str, end_str)

        for s, e in date_range_pair(start, end, step=settings.TRINO_DATE_STEP):
            expected_start, expected_end = s, e

        with GCPReportDBAccessor(self.schema) as accessor:
            with schema_context(self.schema):
                bills = accessor.bills_for_provider_uuid(self.gcp_provider.uuid, start)
                bill_ids = [str(bill.id) for bill in bills]
                current_bill_id = bills.first().id if bills else None

        with CostModelDBAccessor(self.schema, self.gcp_provider.uuid) as cost_model_accessor:
            markup = cost_model_accessor.markup
            markup_value = float(markup.get("value", 0)) / 100

        start_return, end_return = self.updater.update_summary_tables(start, end)
        mock_delete.assert_called_with(self.gcp_provider.uuid, expected_start, expected_end)
        mock_presto.assert_called_with(
            expected_start, expected_end, self.gcp_provider.uuid, current_bill_id, markup_value
        )
        mock_tag_update.assert_called_with(bill_ids, start, end)
        mock_summary_update.assert_called_with(start, end, bill_ids)

        self.assertEqual(start_return, start)
        self.assertEqual(end_return, end)
Esempio n. 5
0
    def test_get_bill_ids_from_provider_with_start_and_end_date(self):
        """Test that bill IDs are returned for an GCP provider with both dates."""
        date_accessor = DateAccessor()

        with ProviderDBAccessor(
                provider_uuid=self.gcp_provider_uuid) as provider_accessor:
            provider = provider_accessor.get_provider()
        with GCPReportDBAccessor(schema=self.schema) as accessor:

            end_date = date_accessor.today_with_timezone("utc").replace(day=1)
            start_date = end_date
            for i in range(2):
                start_date = start_date - relativedelta(months=i)

            bills = accessor.get_cost_entry_bills_query_by_provider(
                provider.uuid)
            with schema_context(self.schema):
                bills = (bills.filter(
                    billing_period_start__gte=start_date.date()).filter(
                        billing_period_start__lte=end_date.date()).all())
                expected_bill_ids = [str(bill.id) for bill in bills]

        bills = utils.get_bills_from_provider(self.gcp_provider_uuid,
                                              self.schema,
                                              start_date=start_date,
                                              end_date=end_date)
        with schema_context(self.schema):
            bill_ids = [str(bill.id) for bill in bills]

        self.assertEqual(bill_ids, expected_bill_ids)
Esempio n. 6
0
    def _delete_line_items_in_range(self, bill_id):
        """Delete stale data between date range."""
        scan_start = ciso8601.parse_datetime(self.scan_start).date()
        scan_end = (ciso8601.parse_datetime(self.scan_end) +
                    relativedelta(days=1)).date()
        gcp_date_filters = {
            "partition_date__gte": scan_start,
            "partition_date__lt": scan_end
        }

        if not self._manifest_id:
            return False
        with ReportManifestDBAccessor() as manifest_accessor:
            num_processed_files = manifest_accessor.number_of_files_processed(
                self._manifest_id)
            if num_processed_files != 0:
                return False

        with GCPReportDBAccessor(self._schema) as accessor:
            line_item_query = accessor.get_lineitem_query_for_billid(bill_id)
            line_item_query = line_item_query.filter(**gcp_date_filters)
            delete_count = line_item_query.delete()
            if delete_count[0] > 0:
                log_statement = (f"items delted ({delete_count[0]}) for:\n"
                                 f" schema_name: {self._schema}\n"
                                 f" provider_uuid: {self._provider_uuid}\n"
                                 f" bill ID: {bill_id}\n"
                                 f" on or after {scan_start}\n"
                                 f" before {scan_end}\n")
                LOG.info(log_statement)
        return True
    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with GCPReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                # Override the bill date to correspond with the manifest
                bill_date = self._manifest.billing_period_start_datetime.date()
                bills = accessor.get_cost_entry_bills_query_by_provider(
                    self._provider.uuid)
                bills = bills.filter(billing_period_start=bill_date).all()
                first_bill = bills.filter(
                    billing_period_start=bill_date).first()
                do_month_update = False
                with schema_context(self._schema):
                    if first_bill:
                        do_month_update = determine_if_full_summary_update_needed(
                            first_bill)
                if do_month_update:
                    last_day_of_month = calendar.monthrange(
                        bill_date.year, bill_date.month)[1]
                    start_date = bill_date
                    end_date = bill_date.replace(day=last_day_of_month)
                    LOG.info(
                        "Overriding start and end date to process full month.")

        if isinstance(start_date, str):
            start_date = ciso8601.parse_datetime(start_date).date()
        if isinstance(end_date, str):
            end_date = ciso8601.parse_datetime(end_date).date()

        return start_date, end_date
Esempio n. 8
0
    def purge_expired_report_data(self, expired_date=None, provider_uuid=None, simulate=False):
        """Remove report data with a billing start period before specified date.

        Args:
            expired_date (datetime.datetime): The cutoff date for removing data.
            provider_uuid (uuid): The DB id of the provider to purge data for.
            simulate (bool): Whether to simluate the removal.

        Returns:
            ([{}]) List of dictionaries containing 'account_payer_id' and 'billing_period_start'

        """
        LOG.info("Calling purge_expired_report_data for gcp")

        with GCPReportDBAccessor(self._schema) as accessor:
            if (expired_date is None and provider_uuid is None) or (  # noqa: W504
                expired_date is not None and provider_uuid is not None
            ):
                err = "This method must be called with either expired_date or provider_uuid"
                raise GCPReportDBCleanerError(err)
            removed_items = []

            if expired_date is not None:
                bill_objects = accessor.get_bill_query_before_date(expired_date)
            else:
                bill_objects = accessor.get_cost_entry_bills_query_by_provider(provider_uuid)
            with schema_context(self._schema):
                for bill in bill_objects.all():
                    bill_id = bill.id
                    removed_provider_uuid = bill.provider_id
                    removed_billing_period_start = bill.billing_period_start

                    if not simulate:
                        del_count = accessor.execute_delete_sql(accessor.get_lineitem_query_for_billid(bill_id))
                        LOG.info("Removing %s cost entry line items for bill id %s", del_count, bill_id)

                        del_count = accessor.execute_delete_sql(accessor.get_daily_query_for_billid(bill_id))
                        LOG.info("Removing %s cost entry daily items for bill id %s", del_count, bill_id)

                        del_count = accessor.execute_delete_sql(accessor.get_summary_query_for_billid(bill_id))
                        LOG.info("Removing %s cost entry summary items for bill id %s", del_count, bill_id)

                    LOG.info(
                        "Report data removed for Provider ID: %s with billing period: %s",
                        removed_provider_uuid,
                        removed_billing_period_start,
                    )
                    removed_items.append(
                        {
                            "removed_provider_uuid": removed_provider_uuid,
                            "billing_period_start": str(removed_billing_period_start),
                        }
                    )

                if not simulate:
                    bill_objects.delete()

        return removed_items
 def db_accessor(self):
     """Return the accessor for the infrastructure provider."""
     if self.provider_type == Provider.PROVIDER_AWS:
         return AWSReportDBAccessor(self.schema_name)
     elif self.provider_type == Provider.PROVIDER_AZURE:
         return AzureReportDBAccessor(self.schema_name)
     elif self.provider_type == Provider.PROVIDER_GCP:
         return GCPReportDBAccessor(self.schema_name)
     return None
    def update_summary_tables(self, start_date, end_date):
        """Populate the summary tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str) A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)

        with schema_context(self._schema):
            self._handle_partitions(self._schema, UI_SUMMARY_TABLES,
                                    start_date, end_date)

        bills = get_bills_from_provider(
            self._provider.uuid,
            self._schema,
            datetime.datetime.strptime(start_date, "%Y-%m-%d"),
            datetime.datetime.strptime(end_date, "%Y-%m-%d"),
        )
        bill_ids = []
        with schema_context(self._schema):
            bill_ids = [str(bill.id) for bill in bills]

        with GCPReportDBAccessor(self._schema) as accessor:
            # Need these bills on the session to update dates after processing
            bills = accessor.bills_for_provider_uuid(self._provider.uuid,
                                                     start_date)
            for start, end in date_range_pair(start_date, end_date):
                LOG.info(
                    "Updating GCP report summary tables: \n\tSchema: %s"
                    "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                    str(bill_ids),
                )
                accessor.populate_line_item_daily_summary_table(
                    start, end, bill_ids)
                accessor.populate_ui_summary_tables(start, end,
                                                    self._provider.uuid)
            accessor.populate_tags_summary_table(bill_ids, start_date,
                                                 end_date)
            for bill in bills:
                if bill.summary_data_creation_datetime is None:
                    bill.summary_data_creation_datetime = self._date_accessor.today_with_timezone(
                        "UTC")
                bill.summary_data_updated_datetime = self._date_accessor.today_with_timezone(
                    "UTC")
                bill.save()

        return start_date, end_date
Esempio n. 11
0
    def setUpClass(cls):
        """Set up the test class with required objects."""
        super().setUpClass()

        cls.accessor = GCPReportDBAccessor(cls.schema)
        cls.report_schema = cls.accessor.report_schema
        cls.all_tables = list(GCP_REPORT_TABLE_MAP.values())
        cls.creator = ReportObjectCreator(cls.schema)
        cls.date_accessor = DateAccessor()
        cls.manifest_accessor = ReportManifestDBAccessor()
Esempio n. 12
0
 def setUpClass(cls):
     """Set up the test class with required objects."""
     super().setUpClass()
     cls.accessor = GCPReportDBAccessor(schema=cls.schema)
     cls.report_schema = cls.accessor.report_schema
     cls.creator = ReportObjectCreator(cls.schema)
     cls.all_tables = list(GCP_REPORT_TABLE_MAP.values())
     cls.foreign_key_tables = [
         GCP_REPORT_TABLE_MAP["bill"], GCP_REPORT_TABLE_MAP["product"]
     ]
Esempio n. 13
0
    def purge_expired_line_item(self,
                                expired_date,
                                provider_uuid=None,
                                simulate=False):
        """Remove raw line item report data with a billing start period before specified date.

        Args:
            expired_date (datetime.datetime): The cutoff date for removing data.
            provider_uuid (uuid): The DB id of the provider to purge data for.
            simulate (bool): Whether to simluate the removal.

        Returns:
            ([{}]) List of dictionaries containing 'account_payer_id' and 'billing_period_start'

        """
        LOG.info("Calling purge_expired_line_item for gcp")
        if not isinstance(expired_date, datetime):
            err = "Parameter expired_date must be a datetime.datetime object."
            raise GCPReportDBCleanerError(err)

        with GCPReportDBAccessor(self._schema) as accessor:
            removed_items = []
            if provider_uuid is not None:
                bill_objects = accessor.get_bill_query_before_date(
                    expired_date, provider_uuid)
            else:
                bill_objects = accessor.get_bill_query_before_date(
                    expired_date)
            with schema_context(self._schema):
                for bill in bill_objects.all():
                    bill_id = bill.id
                    removed_provider_uuid = bill.provider_id
                    removed_billing_period_start = bill.billing_period_start

                    if not simulate:
                        lineitem_query = accessor.get_lineitem_query_for_billid(
                            bill_id)
                        del_count, remainder = mini_transaction_delete(
                            lineitem_query)
                        LOG.info(
                            "Removing %s cost entry line items for bill id %s",
                            del_count, bill_id)

                    LOG.info(
                        "Line item data removed for Provider ID: %s with billing period: %s",
                        removed_provider_uuid,
                        removed_billing_period_start,
                    )
                    removed_items.append({
                        "removed_provider_uuid":
                        removed_provider_uuid,
                        "billing_period_start":
                        str(removed_billing_period_start),
                    })
        return removed_items
Esempio n. 14
0
    def purge_expired_report_data(self,
                                  expired_date=None,
                                  provider_uuid=None,
                                  simulate=False):
        """Remove report data with a billing start period before specified date.

        Args:
            expired_date (datetime.datetime): The cutoff date for removing data.
            provider_uuid (uuid): The DB id of the provider to purge data for.
            simulate (bool): Whether to simluate the removal.

        Returns:
            ([{}]) List of dictionaries containing 'account_payer_id' and 'billing_period_start'

        """
        LOG.info("Calling purge_expired_report_data for gcp")

        with GCPReportDBAccessor(self._schema) as accessor:
            if (expired_date is None
                    and provider_uuid is None) or (  # noqa: W504
                        expired_date is not None
                        and provider_uuid is not None):
                err = "This method must be called with either expired_date or provider_uuid"
                raise GCPReportDBCleanerError(err)
            removed_items = []
            all_providers = set()
            all_period_starts = set()

            if expired_date is not None:
                return self.purge_expired_report_data_by_date(
                    expired_date, simulate=simulate)

            bill_objects = accessor.get_cost_entry_bills_query_by_provider(
                provider_uuid)

        with schema_context(self._schema):
            for bill in bill_objects.all():
                removed_items.append({
                    "removed_provider_uuid":
                    bill.provider_id,
                    "billing_period_start":
                    str(bill.billing_period_start)
                })
                all_providers.add(bill.provider_id)
                all_period_starts.add(str(bill.billing_period_start))

            LOG.info(
                f"Deleting data for providers {all_providers} and periods {all_period_starts}"
            )

            if not simulate:
                cascade_delete(bill_objects.query.model, bill_objects)

        return removed_items
Esempio n. 15
0
    def test_update_summary_cost_model_costs(self, mock_markup):
        """Test to verify GCP derived cost summary is calculated."""
        markup = {"value": 10, "unit": "percent"}
        mock_markup.return_value = markup
        start_date = self.date_accessor.today_with_timezone("UTC")
        bill_date = start_date.replace(day=1).date()

        self.updater.update_summary_cost_model_costs()
        with GCPReportDBAccessor("acct10001") as accessor:
            bill = accessor.get_cost_entry_bills_by_date(bill_date)[0]
            self.assertIsNotNone(bill.derived_cost_datetime)
Esempio n. 16
0
    def _get_sql_inputs(self, start_date, end_date):
        """Get the required inputs for running summary SQL."""
        with GCPReportDBAccessor(self._schema) as accessor:
            # This is the normal processing route
            if self._manifest:
                report_range = accessor.get_gcp_scan_range_from_report_name(
                    manifest_id=self._manifest.id)
                start_date = report_range.get("start", start_date)
                end_date = report_range.get("end", end_date)

        return start_date, end_date
Esempio n. 17
0
    def process(self):
        """Process GCP billing file."""
        row_count = 0

        # Read the csv in batched chunks.
        report_csv = pandas.read_csv(self._report_path, chunksize=self._batch_size, compression="infer")

        with GCPReportDBAccessor(self._schema, self.column_map) as report_db:

            for chunk in report_csv:

                # Group the information in the csv by the start time and the project id
                report_groups = chunk.groupby(by=["Start Time", "Project ID"])
                for group, rows in report_groups:

                    # Each row in the group contains information that we'll need to create the bill
                    # and the project. Just get the first row to pull this information.
                    first_row = OrderedDict(zip(rows.columns.tolist(), rows.iloc[0].tolist()))

                    bill_id = self._get_or_create_cost_entry_bill(first_row, report_db)

                    project_id = self._get_or_create_gcp_project(first_row, report_db)

                    for row in rows.values:
                        processed_row = OrderedDict(zip(rows.columns.tolist(), row.tolist()))
                        self._create_cost_entry_line_item(processed_row, bill_id, project_id, report_db)

                LOG.info(
                    "Saving report rows %d to %d for %s",
                    row_count,
                    row_count + len(self.processed_report.unique_line_items),
                    self._report_name,
                )

                # Create a temp table with all the line items, and merge the temp table to the line item table.
                # This is faster than django's bulk_create.
                temp_table = report_db.create_temp_table(self.line_item_table_name, drop_column="id")

                # Have to put values into line_items because the parent class needs it to _save_to_db
                self.processed_report.line_items = list(self.processed_report.unique_line_items.values())
                self._save_to_db(temp_table, report_db)
                report_db.merge_temp_table(
                    self.line_item_table_name, temp_table, self.line_item_columns, self.line_item_conflict_columns
                )

                row_count += len(self.processed_report.line_items)
                self.processed_report.remove_processed_rows()

            LOG.info("Completed report processing for file: %s and schema: %s", self._report_name, self._schema)

            if not settings.DEVELOPMENT:
                LOG.info("Removing processed file: %s", self._report_path)
                remove(self._report_path)
Esempio n. 18
0
    def purge_expired_report_data_by_date(self, expired_date, simulate=False):
        partition_from = str(date(expired_date.year, expired_date.month, 1))
        with GCPReportDBAccessor(self._schema) as accessor:
            all_bill_objects = accessor.get_bill_query_before_date(
                expired_date).all()
            table_names = [
                accessor._table_map["ocp_on_gcp_daily_summary"],
                accessor._table_map["ocp_on_gcp_project_daily_summary"],
                accessor.line_item_daily_summary_table._meta.db_table,
            ]
            table_names.extend(UI_SUMMARY_TABLES)
            table_models = [get_model(tn) for tn in table_names]

        with schema_context(self._schema):
            removed_items = []
            all_providers = set()
            all_period_starts = set()

            if not simulate:
                # Will call trigger to detach, truncate, and drop partitions
                LOG.info(
                    "Deleting table partitions total for the following tables: "
                    + f"{table_names} with partitions <= {partition_from}")
                del_count = execute_delete_sql(
                    PartitionedTable.objects.filter(
                        schema_name=self._schema,
                        partition_of_table_name__in=table_names,
                        partition_parameters__default=False,
                        partition_parameters__from__lte=partition_from,
                    ))
                LOG.info(f"Deleted {del_count} table partitions")

                # Iterate over the remainder as they could involve much larger amounts of data
            for bill in all_bill_objects:
                removed_items.append({
                    "removed_provider_uuid":
                    bill.provider_id,
                    "billing_period_start":
                    str(bill.billing_period_start)
                })
                all_providers.add(bill.provider_id)
                all_period_starts.add(str(bill.billing_period_start))

            LOG.info(
                f"Deleting data for providers {all_providers} and periods {all_period_starts}"
            )

            if not simulate:
                cascade_delete(all_bill_objects.query.model,
                               all_bill_objects,
                               skip_relations=table_models)

        return removed_items
Esempio n. 19
0
    def setUp(self):
        """Set up GCP tests."""
        super().setUp()
        self.temp_dir = tempfile.mkdtemp()
        self.test_report = f"{self.temp_dir}/202011_30c31bca571d9b7f3b2c8459dd8bc34a_2020-11-08:2020-11-11.csv"

        shutil.copy2(self.test_report_path, self.test_report)

        gcp_auth = ProviderAuthentication.objects.create(
            credentials={"project-id": fake.word()})
        gcp_billing_source = ProviderBillingSource.objects.create(
            data_source={"bucket": fake.word()})
        with patch("masu.celery.tasks.check_report_updates"):
            self.gcp_provider = Provider.objects.create(
                uuid=uuid.uuid4(),
                name="Test Provider",
                type=Provider.PROVIDER_GCP,
                authentication=gcp_auth,
                billing_source=gcp_billing_source,
                customer=self.customer,
                setup_complete=True,
            )

        start_time = "2020-11-08 23:00:00+00:00"
        report_date_range = utils.month_date_range(parser.parse(start_time))
        start_date, end_date = report_date_range.split("-")

        self.start_date_utc = parser.parse(start_date).replace(hour=0,
                                                               minute=0,
                                                               tzinfo=pytz.UTC)
        self.end_date_utc = parser.parse(end_date).replace(hour=0,
                                                           minute=0,
                                                           tzinfo=pytz.UTC)

        self.assembly_id = "1234"
        self.manifest_dict = {
            "assembly_id": self.assembly_id,
            "billing_period_start_datetime": self.start_date_utc,
            "num_total_files": 1,
            "provider_uuid": self.gcp_provider.uuid,
        }
        manifest_accessor = ReportManifestDBAccessor()
        self.manifest = manifest_accessor.add(**self.manifest_dict)

        self.processor = GCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.gcp_provider.uuid,
            manifest_id=self.manifest.id,
        )
        self.accessor = GCPReportDBAccessor(self.schema)
Esempio n. 20
0
    def setUp(self):
        """Set up GCP tests."""
        super().setUp()
        self.temp_dir = tempfile.mkdtemp()
        self.test_report = f'{self.temp_dir}/evidence-2019-06-03.csv'

        shutil.copy2(self.test_report_path, self.test_report)

        gcp_auth = ProviderAuthentication.objects.create(
            credentials={'project-id': fake.word()})
        gcp_billing_source = ProviderBillingSource.objects.create(
            data_source={'bucket': fake.word()})
        self.gcp_provider = Provider.objects.create(
            uuid=uuid.uuid4(),
            name='Test Provider',
            type=Provider.PROVIDER_GCP,
            authentication=gcp_auth,
            billing_source=gcp_billing_source,
            customer=self.customer,
            setup_complete=True,
        )

        start_time = '2019-09-17T00:00:00-07:00'
        report_date_range = utils.month_date_range(parser.parse(start_time))
        start_date, end_date = report_date_range.split('-')

        self.start_date_utc = parser.parse(start_date).replace(hour=0,
                                                               minute=0,
                                                               tzinfo=pytz.UTC)
        self.end_date_utc = parser.parse(end_date).replace(hour=0,
                                                           minute=0,
                                                           tzinfo=pytz.UTC)

        self.assembly_id = '1234'
        self.manifest_dict = {
            'assembly_id': self.assembly_id,
            'billing_period_start_datetime': self.start_date_utc,
            'num_total_files': 1,
            'provider_uuid': self.gcp_provider.uuid,
        }
        manifest_accessor = ReportManifestDBAccessor()
        self.manifest = manifest_accessor.add(**self.manifest_dict)

        self.processor = GCPReportProcessor(
            schema_name=self.schema,
            report_path=self.test_report,
            compression=UNCOMPRESSED,
            provider_uuid=self.gcp_provider.uuid,
            manifest_id=self.manifest.id,
        )
        self.accessor = GCPReportDBAccessor(self.schema, self.column_map)
Esempio n. 21
0
def get_bills_from_provider(provider_uuid,
                            schema,
                            start_date=None,
                            end_date=None):
    """
    Return the GCP bill IDs given a provider UUID.

    Args:
        provider_uuid (str): Provider UUID.
        schema (str): Tenant schema
        start_date (datetime, str): Start date for bill IDs.
        end_date (datetime, str) End date for bill IDs.

    Returns:
        (list): GCP cost entry bill objects.

    """
    if isinstance(start_date, (datetime.datetime, datetime.date)):
        start_date = start_date.replace(day=1)
        start_date = start_date.strftime("%Y-%m-%d")

    if isinstance(end_date, (datetime.datetime, datetime.date)):
        end_date = end_date.strftime("%Y-%m-%d")

    with ProviderDBAccessor(provider_uuid) as provider_accessor:
        provider = provider_accessor.get_provider()

    if not provider:
        err_msg = "Provider UUID is not associated with a given provider."
        LOG.warning(err_msg)
        return []

    if provider.type not in (Provider.PROVIDER_GCP,
                             Provider.PROVIDER_GCP_LOCAL):
        err_msg = f"Provider UUID is not an GCP type.  It is {provider.type}"
        LOG.warning(err_msg)
        return []

    with GCPReportDBAccessor(schema) as report_accessor:
        with schema_context(schema):
            bills = report_accessor.get_cost_entry_bills_query_by_provider(
                provider.uuid)
            if start_date:
                bills = bills.filter(billing_period_start__gte=start_date)
            if end_date:
                bills = bills.filter(billing_period_start__lte=end_date)
            bills = bills.all()

    return bills
Esempio n. 22
0
 def load_gcp_data(self, linked_openshift_provider=None):
     """Load Azure data for tests."""
     bills = []
     provider_type = Provider.PROVIDER_GCP_LOCAL
     credentials = {"project_id": "test_project_id"}
     billing_source = {
         "table_id": "test_table_id",
         "dataset": "test_dataset"
     }
     account_id = "123456789"
     provider = self.create_provider(
         provider_type,
         credentials,
         billing_source,
         "test-gcp",
         linked_openshift_provider=linked_openshift_provider)
     projects = [(self.faker.slug(), self.faker.slug()) for _ in range(3)]
     for start_date, end_date, bill_date in self.dates:
         LOG.info(f"load gcp data for start: {start_date}, end: {end_date}")
         self.create_manifest(provider, bill_date)
         bill = self.create_bill(provider_type, provider, bill_date)
         bills.append(bill)
         with schema_context(self.schema):
             days = (end_date - start_date).days + 1
             for i, project in product(range(days), projects):
                 baker.make_recipe(
                     "api.report.test.util.gcp_daily_summary",
                     cost_entry_bill=bill,
                     invoice_month=bill_date.strftime("%Y%m"),
                     account_id=account_id,
                     project_id=project[0],
                     project_name=project[1],
                     usage_start=start_date + timedelta(i),
                     usage_end=start_date + timedelta(i),
                     tags=cycle(self.tags),
                     currency=self.currency,
                     source_uuid=provider.uuid,
                     _quantity=len(self.tags),
                 )
     bill_ids = [bill.id for bill in bills]
     with GCPReportDBAccessor(self.schema) as accessor:
         accessor.populate_tags_summary_table(bill_ids,
                                              self.first_start_date,
                                              self.last_end_date)
         accessor.populate_ui_summary_tables(self.first_start_date,
                                             self.last_end_date,
                                             provider.uuid)
     return bills
Esempio n. 23
0
    def __init__(self,
                 schema_name,
                 report_path,
                 compression,
                 provider_uuid,
                 manifest_id=None):
        """Initialize the report processor.

        Args:
            schema_name (str): The name of the customer schema to process into
            report_path (str): Where the report file lives in the file system
            compression (CONST): How the report file is compressed.
                Accepted values: UNCOMPRESSED, GZIP_COMPRESSED

        """
        super().__init__(
            schema_name=schema_name,
            report_path=report_path,
            compression=compression,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
            processed_report=ProcessedGCPReport(),
        )
        self._report_name = path.basename(report_path)
        self._batch_size = Config.REPORT_PROCESSING_BATCH_SIZE
        self._manifest_id = manifest_id
        self._provider_uuid = provider_uuid
        self.table_name = GCPCostEntryLineItem()

        self._schema = schema_name

        with GCPReportDBAccessor(self._schema) as report_db:
            self.report_schema = report_db.report_schema
            self.existing_bill_map = report_db.get_cost_entry_bills()
            self.existing_product_map = report_db.get_products()
            self.existing_projects_map = report_db.get_projects()
            self.report_scan_range = report_db.get_gcp_scan_range_from_report_name(
                report_name=self._report_name)

        self.scan_start = self.report_scan_range.get("start")
        self.scan_end = self.report_scan_range.get("end")
        if not self.scan_start or not self.scan_end:
            err_msg = f"Error recovering start and end date from csv report ({self._report_name})."
            raise ProcessedGCPReportError(err_msg)
        LOG.info("Initialized report processor for file: %s and schema: %s",
                 report_path, self._schema)

        self.line_item_columns = None
Esempio n. 24
0
    def _update_markup_cost(self, start_date, end_date):
        """Store markup costs."""
        try:
            bills = get_bills_from_provider(self._provider.uuid, self._schema,
                                            start_date, end_date)
            with CostModelDBAccessor(
                    self._schema, self._provider.uuid) as cost_model_accessor:
                markup = cost_model_accessor.markup
                markup_value = float(markup.get("value", 0)) / 100

            with GCPReportDBAccessor(self._schema) as report_accessor:
                with schema_context(self._schema):
                    bill_ids = [str(bill.id) for bill in bills]
                report_accessor.populate_markup_cost(markup_value, start_date,
                                                     end_date, bill_ids)
        except GCPCostModelCostUpdaterError as error:
            LOG.error("Unable to update markup costs. Error: %s", str(error))
Esempio n. 25
0
    def __init__(self,
                 schema_name,
                 report_path,
                 compression,
                 provider_uuid,
                 manifest_id=None):
        """Initialize the report processor.

        Args:
            schema_name (str): The name of the customer schema to process into
            report_path (str): Where the report file lives in the file system
            compression (CONST): How the report file is compressed.
                Accepted values: UNCOMPRESSED, GZIP_COMPRESSED

        """
        super().__init__(
            schema_name=schema_name,
            report_path=report_path,
            compression=compression,
            provider_uuid=provider_uuid,
            manifest_id=manifest_id,
            processed_report=ProcessedGCPReport(),
        )

        self.line_item_table = GCPCostEntryLineItem()
        self.line_item_table_name = self.line_item_table._meta.db_table
        self._report_name = path.basename(report_path)
        self._batch_size = Config.REPORT_PROCESSING_BATCH_SIZE
        self._manifest_id = manifest_id
        self._provider_uuid = provider_uuid

        self._schema = schema_name

        with GCPReportDBAccessor(self._schema) as report_db:
            self.report_schema = report_db.report_schema
            self.existing_bill_map = report_db.get_cost_entry_bills()
            self.existing_product_map = report_db.get_products()
            self.existing_projects_map = report_db.get_projects()

        LOG.info("Initialized report processor for file: %s and schema: %s",
                 report_path, self._schema)

        self.line_item_columns = None
Esempio n. 26
0
    def _delete_line_items_in_range(self, bill_id, scan_start):
        """Delete stale data between date range."""
        gcp_date_filter = {"usage_start__gte": scan_start}

        if not self._manifest_id:
            return False
        with ReportManifestDBAccessor() as manifest_accessor:
            num_processed_files = manifest_accessor.number_of_files_processed(
                self._manifest_id)
            if num_processed_files != 0:
                return False

        with GCPReportDBAccessor(self._schema) as accessor:
            line_item_query = accessor.get_lineitem_query_for_billid(bill_id)
            line_item_query = line_item_query.filter(**gcp_date_filter)
            delete_count = line_item_query.delete()
            if delete_count:
                log_statement = (f"Deleting data for:\n"
                                 f" schema_name: {self._schema}\n"
                                 f" provider_uuid: {self._provider_uuid}\n"
                                 f" bill ID: {bill_id}\n"
                                 f" on or after {scan_start}")
                LOG.info(log_statement)
        return True
Esempio n. 27
0
    def update_daily_tables(self, start_date, end_date):
        """Populate the daily tables for reporting.

        Args:
            start_date (str) The date to start populating the table.
            end_date   (str) The date to end on.

        Returns
            (str, str): A start date and end date.

        """
        start_date, end_date = self._get_sql_inputs(start_date, end_date)
        bills = get_bills_from_provider(
            self._provider.uuid,
            self._schema,
            datetime.datetime.strptime(start_date, "%Y-%m-%d"),
            datetime.datetime.strptime(end_date, "%Y-%m-%d"),
        )
        bill_ids = []
        with schema_context(self._schema):
            bill_ids = [str(bill.id) for bill in bills]

        with GCPReportDBAccessor(self._schema) as accessor:
            for start, end in date_range_pair(start_date, end_date):
                LOG.info(
                    "Updating GCP report daily tables for \n\tSchema: %s"
                    "\n\tProvider: %s \n\tDates: %s - %s\n\tBills: %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                    str(bill_ids),
                )
                accessor.populate_line_item_daily_table(start, end, bill_ids)

        return start_date, end_date
Esempio n. 28
0
 def setUpClass(cls):
     """Set up the customer view tests."""
     super().setUpClass()
     cls.accessor = GCPReportDBAccessor(schema=cls.schema)
Esempio n. 29
0
    def process(self):
        """Process GCP billing file."""
        row_count = 0

        if not path.exists(self._report_path):
            LOG.info(
                "Skip processing for file: %s and schema: %s as it was not found on disk.",
                self._report_name,
                self._schema,
            )
            return False

        # Read the csv in batched chunks.
        report_csv = pandas.read_csv(self._report_path,
                                     chunksize=self._batch_size,
                                     compression="infer")

        bills_purged = []
        with GCPReportDBAccessor(self._schema) as report_db:
            temp_table = report_db.create_temp_table(
                self.table_name._meta.db_table, drop_column="id")
            for chunk in report_csv:

                # Group the information in the csv by the start time and the project id
                report_groups = chunk.groupby(
                    by=["invoice.month", "project.id"])
                for group, rows in report_groups:

                    # Each row in the group contains information that we'll need to create the bill
                    # and the project. Just get the first row to pull this information.
                    first_row = OrderedDict(
                        zip(rows.columns.tolist(), rows.iloc[0].tolist()))

                    bill_id = self._get_or_create_cost_entry_bill(
                        first_row, report_db)
                    if bill_id not in bills_purged:
                        self._delete_line_items_in_range(bill_id)
                        bills_purged.append(bill_id)

                    project_id = self._get_or_create_gcp_project(
                        first_row, report_db)

                    for row in rows.values:
                        processed_row = OrderedDict(
                            zip(rows.columns.tolist(), row.tolist()))
                        service_product_id = self._get_or_create_gcp_service_product(
                            processed_row, report_db)
                        self._create_cost_entry_line_item(
                            processed_row, bill_id, project_id, report_db,
                            service_product_id)
                if self.processed_report.line_items:
                    LOG.info(
                        "Saving report rows %d to %d for %s",
                        row_count,
                        row_count + len(self.processed_report.line_items),
                        self._report_name,
                    )
                    self._save_to_db(temp_table, report_db)
                    row_count += len(self.processed_report.line_items)
                    self._update_mappings()

            if self.line_item_columns:
                report_db.merge_temp_table(self.table_name._meta.db_table,
                                           temp_table, self.line_item_columns)

            LOG.info("Completed report processing for file: %s and schema: %s",
                     self._report_name, self._schema)

            if not settings.DEVELOPMENT:
                LOG.info("Removing processed file: %s", self._report_path)
                remove(self._report_path)

        return True
    def update_gcp_summary_tables(self, openshift_provider_uuid,
                                  gcp_provider_uuid, start_date, end_date):
        """Update operations specifically for OpenShift on GCP."""
        if isinstance(start_date, str):
            start_date = parser.parse(start_date).date()
        if isinstance(end_date, str):
            end_date = parser.parse(end_date).date()

        cluster_id = get_cluster_id_from_provider(openshift_provider_uuid)
        cluster_alias = get_cluster_alias_from_cluster_id(cluster_id)

        with OCPReportDBAccessor(self._schema) as accessor:
            report_period = accessor.report_periods_for_provider_uuid(
                openshift_provider_uuid, start_date)
            if not report_period:
                LOG.info(
                    f"No report period for GCP provider {openshift_provider_uuid} with start date {start_date}"
                )
                return
            accessor.delete_infrastructure_raw_cost_from_daily_summary(
                openshift_provider_uuid, report_period.id, start_date,
                end_date)
        gcp_bills = gcp_get_bills_from_provider(gcp_provider_uuid,
                                                self._schema, start_date,
                                                end_date)
        with schema_context(self._schema):
            self._handle_partitions(
                self._schema,
                ((
                    "reporting_ocpgcpcostlineitem_daily_summary_p",
                    "reporting_ocpgcpcostlineitem_project_daily_summary_p",
                    "reporting_ocpallcostlineitem_daily_summary_p",
                    "reporting_ocpallcostlineitem_project_daily_summary_p",
                    "reporting_ocpall_compute_summary_pt",
                    "reporting_ocpall_cost_summary_pt",
                ) + OCPGCP_UI_SUMMARY_TABLES),
                start_date,
                end_date,
            )

            gcp_bill_ids = [str(bill.id) for bill in gcp_bills]
            current_gcp_bill_id = gcp_bills.first().id if gcp_bills else None
            current_ocp_report_period_id = report_period.id

        with CostModelDBAccessor(self._schema,
                                 gcp_provider_uuid) as cost_model_accessor:
            markup = cost_model_accessor.markup
            markup_value = Decimal(markup.get("value", 0)) / 100

        with CostModelDBAccessor(
                self._schema, openshift_provider_uuid) as cost_model_accessor:
            distribution = cost_model_accessor.distribution

        # OpenShift on GCP
        sql_params = {
            "schema_name": self._schema,
            "start_date": start_date,
            "end_date": end_date,
            "source_uuid": gcp_provider_uuid,
            "cluster_id": cluster_id,
            "cluster_alias": cluster_alias,
        }
        with GCPReportDBAccessor(self._schema) as accessor:
            for start, end in date_range_pair(start_date,
                                              end_date,
                                              step=settings.TRINO_DATE_STEP):
                LOG.info(
                    "Updating OpenShift on GCP summary table for "
                    "\n\tSchema: %s \n\tProvider: %s \n\tDates: %s - %s"
                    "\n\tCluster ID: %s, GCP Bill ID: %s",
                    self._schema,
                    self._provider.uuid,
                    start,
                    end,
                    cluster_id,
                    current_gcp_bill_id,
                )
                filters = {
                    "report_period_id": current_ocp_report_period_id
                }  # Use report_period_id to leverage DB index on DELETE
                accessor.delete_line_item_daily_summary_entries_for_date_range_raw(
                    self._provider.uuid,
                    start,
                    end,
                    filters,
                    table=OCPGCPCostLineItemProjectDailySummaryP)
                accessor.populate_ocp_on_gcp_cost_daily_summary_presto(
                    start,
                    end,
                    openshift_provider_uuid,
                    cluster_id,
                    gcp_provider_uuid,
                    current_ocp_report_period_id,
                    current_gcp_bill_id,
                    markup_value,
                    distribution,
                )
            accessor.back_populate_ocp_on_gcp_daily_summary_trino(
                start_date, end_date, current_ocp_report_period_id)
            accessor.populate_ocp_on_gcp_ui_summary_tables(sql_params)
            accessor.populate_ocp_on_gcp_tags_summary_table(
                gcp_bill_ids, start_date, end_date)

            with OCPReportDBAccessor(self._schema) as ocp_accessor:
                sql_params["source_type"] = "GCP"
                LOG.info(
                    f"Processing OCP-ALL for GCP (T)  (s={start_date} e={end_date})"
                )
                ocp_accessor.populate_ocp_on_all_project_daily_summary(
                    "gcp", sql_params)
                ocp_accessor.populate_ocp_on_all_daily_summary(
                    "gcp", sql_params)
                ocp_accessor.populate_ocp_on_all_ui_summary_tables(sql_params)

                ocp_accessor.populate_ui_summary_tables(
                    start, end, openshift_provider_uuid,
                    UI_SUMMARY_TABLES_MARKUP_SUBSET)
        LOG.info(
            "Updating ocp_on_cloud_updated_datetime on OpenShift report periods"
        )
        with schema_context(self._schema):
            report_period.ocp_on_cloud_updated_datetime = self._date_accessor.today_with_timezone(
                "UTC")
            report_period.save()