Python record_report_status 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: masu.processor.tasks

메소드/함수: record_report_status

hotexamples.com에서의 예제들: 4

Python record_report_status - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 masu.processor.tasks.record_report_status에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: orchestrator.py 프로젝트: xJustin/koku

    def start_manifest_processing(self, customer_name, credentials,
                                  data_source, provider_type, schema_name,
                                  provider_uuid, report_month):
        """
        Start processing an account's manifest for the specified report_month.

        Args:
            (String) customer_name - customer name
            (String) credentials - credentials object
            (String) data_source - report storage location
            (String) schema_name - db tenant
            (String) provider_uuid - provider unique identifier
            (Date)   report_month - month to get latest manifest

        Returns:
            ({}) Dictionary containing the following keys:
                manifest_id - (String): Manifest ID for ReportManifestDBAccessor
                assembly_id - (String): UUID identifying report file
                compression - (String): Report compression format
                files       - ([{"key": full_file_path "local_file": "local file name"}]): List of report files.
        """
        downloader = ReportDownloader(
            customer_name=customer_name,
            credentials=credentials,
            data_source=data_source,
            provider_type=provider_type,
            provider_uuid=provider_uuid,
            report_name=None,
        )
        manifest = downloader.download_manifest(report_month)

        if manifest:
            LOG.info("Saving all manifest file names.")
            record_all_manifest_files(manifest["manifest_id"], [
                report.get("local_file")
                for report in manifest.get("files", [])
            ])

        LOG.info(f"Found Manifests: {str(manifest)}")
        report_files = manifest.get("files", [])
        report_tasks = []
        for report_file_dict in report_files:
            local_file = report_file_dict.get("local_file")
            report_file = report_file_dict.get("key")

            # Check if report file is complete or in progress.
            if record_report_status(manifest["manifest_id"], local_file,
                                    "no_request"):
                LOG.info(f"{local_file} was already processed")
                continue

            cache_key = f"{provider_uuid}:{report_file}"
            if self.worker_cache.task_is_running(cache_key):
                LOG.info(f"{local_file} process is in progress")
                continue

            report_context = manifest.copy()
            report_context["current_file"] = report_file
            report_context["local_file"] = local_file
            report_context["key"] = report_file

            report_tasks.append(
                get_report_files.s(
                    customer_name,
                    credentials,
                    data_source,
                    provider_type,
                    schema_name,
                    provider_uuid,
                    report_month,
                    report_context,
                ))
            LOG.info("Download queued - schema_name: %s.", schema_name)

        if report_tasks:
            async_id = chord(report_tasks, summarize_reports.s())()
            LOG.info(f"Manifest Processing Async ID: {async_id}")
        return manifest

예제 #2

파일 보기

    def start_manifest_processing(self, customer_name, credentials,
                                  data_source, provider_type, schema_name,
                                  provider_uuid, report_month):
        """
        Start processing an account's manifest for the specified report_month.

        Args:
            (String) customer_name - customer name
            (String) credentials - credentials object
            (String) data_source - report storage location
            (String) schema_name - db tenant
            (String) provider_uuid - provider unique identifier
            (Date)   report_month - month to get latest manifest

        Returns:
            ({}) Dictionary containing the following keys:
                manifest_id - (String): Manifest ID for ReportManifestDBAccessor
                assembly_id - (String): UUID identifying report file
                compression - (String): Report compression format
                files       - ([{"key": full_file_path "local_file": "local file name"}]): List of report files.
            (Boolean) - Whether we are processing this manifest
        """
        # Switching initial ingest to use priority queue for QE tests based on QE_SCHEMA flag
        if self.queue_name is not None and self.provider_uuid is not None:
            SUMMARY_QUEUE = self.queue_name
            REPORT_QUEUE = self.queue_name
        else:
            SUMMARY_QUEUE = SUMMARIZE_REPORTS_QUEUE
            REPORT_QUEUE = GET_REPORT_FILES_QUEUE
        reports_tasks_queued = False
        downloader = ReportDownloader(
            customer_name=customer_name,
            credentials=credentials,
            data_source=data_source,
            provider_type=provider_type,
            provider_uuid=provider_uuid,
            report_name=None,
        )
        manifest = downloader.download_manifest(report_month)
        tracing_id = manifest.get("assembly_id",
                                  manifest.get("request_id", "no-request-id"))
        files = manifest.get("files", [])
        filenames = []
        for file in files:
            filenames.append(file.get("local_file"))
        LOG.info(
            log_json(
                tracing_id,
                f"Report with manifest {tracing_id} contains the files: {filenames}"
            ))

        if manifest:
            LOG.debug("Saving all manifest file names.")
            record_all_manifest_files(manifest["manifest_id"], [
                report.get("local_file")
                for report in manifest.get("files", [])
            ], tracing_id)

        LOG.info(log_json(tracing_id, f"Found Manifests: {str(manifest)}"))
        report_files = manifest.get("files", [])
        report_tasks = []
        last_report_index = len(report_files) - 1
        for i, report_file_dict in enumerate(report_files):
            local_file = report_file_dict.get("local_file")
            report_file = report_file_dict.get("key")

            # Check if report file is complete or in progress.
            if record_report_status(manifest["manifest_id"], local_file,
                                    "no_request"):
                LOG.info(
                    log_json(tracing_id,
                             f"{local_file} was already processed"))
                continue

            cache_key = f"{provider_uuid}:{report_file}"
            if self.worker_cache.task_is_running(cache_key):
                LOG.info(
                    log_json(tracing_id,
                             f"{local_file} process is in progress"))
                continue

            report_context = manifest.copy()
            report_context["current_file"] = report_file
            report_context["local_file"] = local_file
            report_context["key"] = report_file
            report_context["request_id"] = tracing_id

            if provider_type in [Provider.PROVIDER_OCP, Provider.PROVIDER_GCP
                                 ] or i == last_report_index:
                # This create_table flag is used by the ParquetReportProcessor
                # to create a Hive/Trino table.
                # To reduce the number of times we check Trino/Hive tables, we just do this
                # on the final file of the set.
                report_context["create_table"] = True
            # add the tracing id to the report context
            # This defaults to the celery queue
            report_tasks.append(
                get_report_files.s(
                    customer_name,
                    credentials,
                    data_source,
                    provider_type,
                    schema_name,
                    provider_uuid,
                    report_month,
                    report_context,
                ).set(queue=REPORT_QUEUE))
            LOG.info(
                log_json(tracing_id,
                         f"Download queued - schema_name: {schema_name}."))

        if report_tasks:
            reports_tasks_queued = True
            async_id = chord(report_tasks,
                             summarize_reports.s().set(queue=SUMMARY_QUEUE))()
            LOG.debug(
                log_json(tracing_id,
                         f"Manifest Processing Async ID: {async_id}"))
        return manifest, reports_tasks_queued

예제 #3

파일 보기

def extract_payload(url, request_id, context={}):  # noqa: C901
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            files - names of .csv usage reports for the manifest
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Once the files are extracted:
    1. Provider account is retrieved for the cluster id.  If no account is found we return.
    2. Manifest database record is created which will establish the assembly_id and number of files
    3. Report stats database record is created and is used as a filter to determine if the file
       has already been processed.
    4. All report files that have not been processed will have the local path to that report file
       added to the report_meta context dictionary for that file.
    5. Report file context dictionaries that require processing is added to a list which will be
       passed to the report processor.  All context from report_meta is used by the processor.

    Args:
        url (String): URL path to payload in the Insights upload service..
        request_id (String): Identifier associated with the payload
        context (Dict): Context for logging (account, etc)

    Returns:
        [dict]: keys: value
                files: [String],
                date: DateTime,
                cluster_id: String
                manifest_path: String,
                provider_uuid: String,
                provider_type: String
                schema_name: String
                manifest_id: Integer
                current_file: String

    """
    temp_dir, temp_file_path, temp_file = download_payload(
        request_id, url, context)
    manifest_path = extract_payload_contents(request_id, temp_dir,
                                             temp_file_path, temp_file,
                                             context)

    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = f"{temp_dir}/{manifest_path[0]}"
    report_meta = utils.get_report_details(os.path.dirname(full_manifest_path))

    # Filter and get account from payload's cluster-id
    cluster_id = report_meta.get("cluster_id")
    if context:
        context["cluster_id"] = cluster_id
    account = get_account_from_cluster_id(cluster_id, request_id, context)
    if not account:
        msg = f"Recieved unexpected OCP report from {cluster_id}"
        LOG.error(log_json(request_id, msg, context))
        shutil.rmtree(temp_dir)
        return None
    schema_name = account.get("schema_name")
    provider_type = account.get("provider_type")
    context["account"] = schema_name[4:]
    context["provider_type"] = provider_type
    report_meta["provider_uuid"] = account.get("provider_uuid")
    report_meta["provider_type"] = provider_type
    report_meta["schema_name"] = schema_name
    report_meta["account"] = schema_name[4:]
    report_meta["request_id"] = request_id

    # Create directory tree for report.
    usage_month = utils.month_date_range(report_meta.get("date"))
    destination_dir = f"{Config.INSIGHTS_LOCAL_REPORT_DIR}/{report_meta.get('cluster_id')}/{usage_month}"
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = f"{destination_dir}/{os.path.basename(report_meta.get('manifest_path'))}"
    shutil.copy(report_meta.get("manifest_path"), manifest_destination_path)

    # Save Manifest
    report_meta["manifest_id"] = create_manifest_entries(
        report_meta, request_id, context)

    # Copy report payload
    report_metas = []
    for report_file in report_meta.get("files"):
        current_meta = report_meta.copy()
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = f"{subdirectory}/{report_file}"
        payload_destination_path = f"{destination_dir}/{report_file}"
        try:
            shutil.copy(payload_source_path, payload_destination_path)
            current_meta["current_file"] = payload_destination_path
            record_all_manifest_files(report_meta["manifest_id"],
                                      report_meta.get("files"))
            if not record_report_status(report_meta["manifest_id"],
                                        report_file, request_id, context):
                msg = f"Successfully extracted OCP for {report_meta.get('cluster_id')}/{usage_month}"
                LOG.info(log_json(request_id, msg, context))
                construct_parquet_reports(request_id, context, report_meta,
                                          payload_destination_path,
                                          report_file)
                report_metas.append(current_meta)
            else:
                # Report already processed
                pass
        except FileNotFoundError:
            msg = f"File {str(report_file)} has not downloaded yet."
            LOG.debug(log_json(request_id, msg, context))

    # Remove temporary directory and files
    shutil.rmtree(temp_dir)
    return report_metas

예제 #4

파일 보기

    def start_manifest_processing(self, customer_name, credentials,
                                  data_source, provider_type, schema_name,
                                  provider_uuid, report_month):
        """
        Start processing an account's manifest for the specified report_month.

        Args:
            (String) customer_name - customer name
            (String) credentials - credentials object
            (String) data_source - report storage location
            (String) schema_name - db tenant
            (String) provider_uuid - provider unique identifier
            (Date)   report_month - month to get latest manifest

        Returns:
            ({}) Dictionary containing the following keys:
                manifest_id - (String): Manifest ID for ReportManifestDBAccessor
                assembly_id - (String): UUID identifying report file
                compression - (String): Report compression format
                files       - ([{"key": full_file_path "local_file": "local file name"}]): List of report files.
            (Boolean) - Whether we are processing this manifest
        """
        reports_tasks_queued = False
        downloader = ReportDownloader(
            customer_name=customer_name,
            credentials=credentials,
            data_source=data_source,
            provider_type=provider_type,
            provider_uuid=provider_uuid,
            report_name=None,
        )
        manifest = downloader.download_manifest(report_month)

        if manifest:
            LOG.info("Saving all manifest file names.")
            record_all_manifest_files(manifest["manifest_id"], [
                report.get("local_file")
                for report in manifest.get("files", [])
            ])

        LOG.info(f"Found Manifests: {str(manifest)}")
        report_files = manifest.get("files", [])
        report_tasks = []
        last_report_index = len(report_files) - 1
        for i, report_file_dict in enumerate(report_files):
            local_file = report_file_dict.get("local_file")
            report_file = report_file_dict.get("key")

            # Check if report file is complete or in progress.
            if record_report_status(manifest["manifest_id"], local_file,
                                    "no_request"):
                LOG.info(f"{local_file} was already processed")
                continue

            cache_key = f"{provider_uuid}:{report_file}"
            if self.worker_cache.task_is_running(cache_key):
                LOG.info(f"{local_file} process is in progress")
                continue

            report_context = manifest.copy()
            report_context["current_file"] = report_file
            report_context["local_file"] = local_file
            report_context["key"] = report_file

            if provider_type == Provider.PROVIDER_OCP or i == last_report_index:
                # To reduce the number of times we check Trino/Hive tables, we just do this
                # on the final file of the set.
                report_context["create_table"] = True

            # This defaults to the celery queue
            report_tasks.append(
                get_report_files.s(
                    customer_name,
                    credentials,
                    data_source,
                    provider_type,
                    schema_name,
                    provider_uuid,
                    report_month,
                    report_context,
                ).set(queue=GET_REPORT_FILES_QUEUE))
            LOG.info("Download queued - schema_name: %s.", schema_name)

        if report_tasks:
            reports_tasks_queued = True
            async_id = chord(
                report_tasks,
                summarize_reports.s().set(
                    queue=REFRESH_MATERIALIZED_VIEWS_QUEUE))()
            LOG.info(f"Manifest Processing Async ID: {async_id}")
        return manifest, reports_tasks_queued