Beispiel #1
0
    def get_report_for(self, date_time):
        """
        Get OCP usage report files corresponding to a date.

        Args:
            date_time (DateTime): Start date of the usage report.

        Returns:
            ([]) List of file paths for a particular report.

        """
        dates = utils.month_date_range(date_time)
        LOG.debug('Looking for cluster %s report for date %s', self.cluster_id, str(dates))
        directory = '{}/{}/{}'.format(REPORTS_DIR, self.cluster_id, dates)

        manifest = self._get_manifest(date_time)
        LOG.info('manifest found: %s', str(manifest))
        latest_uuid = manifest.get('uuid')

        reports = []
        try:
            if latest_uuid:
                for file in os.listdir(directory):
                    if file.startswith(latest_uuid):
                        report_full_path = os.path.join(directory, file)
                        LOG.info('Found file %s', report_full_path)
                        reports.append(report_full_path)
            else:
                LOG.error('Current UUID for report could not be found.')
        except OSError as error:
            LOG.error('Unable to get report. Error: %s', str(error))
        return reports
    def get_report_for(self, date_time):
        """
        Get OCP usage report files corresponding to a date.

        Args:
            date_time (DateTime): Start date of the usage report.

        Returns:
            ([]) List of file paths for a particular report.

        """
        dates = utils.month_date_range(date_time)
        msg = f"Looking for cluster {self.cluster_id} report for date {str(dates)}"
        LOG.debug(log_json(self.request_id, msg, self.context))
        directory = f"{REPORTS_DIR}/{self.cluster_id}/{dates}"

        manifest = self._get_manifest(date_time)
        msg = f"manifest found: {str(manifest)}"
        LOG.info(log_json(self.request_id, msg, self.context))

        reports = []
        for file in manifest.get("files", []):
            report_full_path = os.path.join(directory, file)
            reports.append(report_full_path)

        return reports
Beispiel #3
0
    def remove_temp_cur_files(self, report_path):
        """Remove temporary report files."""
        LOG.info('Cleaning up temporary report files for %s', report_path)

        manifest_path = '{}/{}'.format(report_path, 'manifest.json')
        current_assembly_id = None
        cluster_id = None
        payload_date = None
        month_range = None
        with open(manifest_path, 'r') as manifest_file_handle:
            manifest_json = json.load(manifest_file_handle)
            current_assembly_id = manifest_json.get('uuid')
            cluster_id = manifest_json.get('cluster_id')
            payload_date = manifest_json.get('date')
            if payload_date:
                month_range = month_date_range(parser.parse(payload_date))

        removed_files = []
        if current_assembly_id:
            removed_files = clear_temp_directory(report_path,
                                                 current_assembly_id)

        if current_assembly_id and cluster_id and month_range:
            report_prefix = '{}_'.format(month_range)
            insights_local_path = '{}/{}/{}'.format(
                Config.INSIGHTS_LOCAL_REPORT_DIR, cluster_id, month_range)
            clear_temp_directory(insights_local_path, current_assembly_id,
                                 report_prefix)

        return removed_files
 def _get_manifest(self, date_time):
     dates = utils.month_date_range(date_time)
     directory = f"{REPORTS_DIR}/{self.cluster_id}/{dates}"
     msg = f"Looking for manifest at {directory}"
     LOG.info(log_json(self.request_id, msg, self.context))
     report_meta = utils.get_report_details(directory)
     return report_meta
Beispiel #5
0
    def _prepare_db_manifest_record(self, manifest):
        """Prepare to insert or update the manifest DB record."""
        assembly_id = manifest.get('uuid')

        date_range = utils.month_date_range(manifest.get('date'))
        billing_str = date_range.split('-')[0]
        billing_start = datetime.datetime.strptime(billing_str, '%Y%m%d')

        num_of_files = len(manifest.get('files', []))
        return self._process_manifest_db_record(assembly_id, billing_start, num_of_files)
Beispiel #6
0
    def _prepare_db_manifest_record(self, manifest):
        """Prepare to insert or update the manifest DB record."""
        assembly_id = manifest.get("uuid")

        date_range = utils.month_date_range(manifest.get("date"))
        billing_str = date_range.split("-")[0]
        billing_start = datetime.datetime.strptime(billing_str, "%Y%m%d")
        manifest_timestamp = manifest.get("date")
        num_of_files = len(manifest.get("files", []))

        return self._process_manifest_db_record(assembly_id, billing_start, num_of_files, manifest_timestamp)
Beispiel #7
0
    def _remove_manifest_file(self, date_time):
        """Clean up the manifest file after extracting information."""
        dates = utils.month_date_range(date_time)
        directory = '{}/{}/{}'.format(REPORTS_DIR, self.cluster_id, dates)

        manifest_path = '{}/{}'.format(directory, 'manifest.json')
        try:
            os.remove(manifest_path)
            LOG.info('Deleted manifest file at %s', directory)
        except OSError:
            LOG.info('Could not delete manifest file at %s', directory)

        return None
Beispiel #8
0
    def _remove_manifest_file(self, date_time):
        """Clean up the manifest file after extracting information."""
        dates = utils.month_date_range(date_time)
        directory = f"{REPORTS_DIR}/{self.cluster_id}/{dates}"

        manifest_path = "{}/{}".format(directory, "manifest.json")
        try:
            os.remove(manifest_path)
            LOG.info("Deleted manifest file at %s", directory)
        except OSError:
            LOG.info("Could not delete manifest file at %s", directory)

        return None
    def _remove_manifest_file(self, date_time):
        """Clean up the manifest file after extracting information."""
        dates = utils.month_date_range(date_time)
        directory = f"{REPORTS_DIR}/{self.cluster_id}/{dates}"

        manifest_path = "{}/{}".format(directory, "manifest.json")
        try:
            os.remove(manifest_path)
            msg = f"Deleted manifest file at {directory}"
            LOG.debug(log_json(self.request_id, msg, self.context))
        except OSError:
            msg = f"Could not delete manifest file at {directory}"
            LOG.info(log_json(self.request_id, msg, self.context))

        return None
Beispiel #10
0
def extract_payload(url):
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            file - .csv usage report file name
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Args:
        url (String): URL path to payload in the Insights upload service..

    Returns:
        (Dict): keys: value
            "file: String,
             cluster_id: String,
             payload_date: DateTime,
             manifest_path: String,
             uuid: String,
             manifest_path: String"

    """
    # Create temporary directory for initial file staging and verification in the
    # OpenShift PVC directory so that any failures can be triaged in the event
    # the pod goes down.
    os.makedirs(Config.TMP_DIR, exist_ok=True)
    temp_dir = tempfile.mkdtemp(dir=Config.TMP_DIR)

    # Download file from quarntine bucket as tar.gz
    try:
        download_response = requests.get(url)
        download_response.raise_for_status()
    except requests.exceptions.HTTPError as err:
        shutil.rmtree(temp_dir)
        raise KafkaMsgHandlerError('Unable to download file. Error: ', str(err))

    temp_file = '{}/{}'.format(temp_dir, 'usage.tar.gz')
    try:
        temp_file_hdl = open('{}/{}'.format(temp_dir, 'usage.tar.gz'), 'wb')
        temp_file_hdl.write(download_response.content)
        temp_file_hdl.close()
    except (OSError, IOError) as error:
        shutil.rmtree(temp_dir)
        raise KafkaMsgHandlerError('Unable to write file. Error: ', str(error))

    # Extract tarball into temp directory
    try:
        mytar = TarFile.open(temp_file)
        mytar.extractall(path=temp_dir)
        files = mytar.getnames()
        manifest_path = [manifest for manifest in files if 'manifest.json' in manifest]
    except ReadError as error:
        LOG.error('Unable to untar file. Reason: %s', str(error))
        shutil.rmtree(temp_dir)
        raise KafkaMsgHandlerError('Extraction failure.')

    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = '{}/{}'.format(temp_dir, manifest_path[0])
    report_meta = utils.get_report_details(os.path.dirname(full_manifest_path))

    # Create directory tree for report.
    usage_month = utils.month_date_range(report_meta.get('date'))
    destination_dir = '{}/{}/{}'.format(Config.INSIGHTS_LOCAL_REPORT_DIR,
                                        report_meta.get('cluster_id'),
                                        usage_month)
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = '{}/{}'.format(destination_dir,
                                               os.path.basename(report_meta.get('manifest_path')))
    shutil.copy(report_meta.get('manifest_path'), manifest_destination_path)

    # Copy report payload
    for report_file in report_meta.get('files'):
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = '{}/{}'.format(subdirectory, report_file)
        payload_destination_path = '{}/{}'.format(destination_dir, report_file)
        try:
            shutil.copy(payload_source_path, payload_destination_path)
        except FileNotFoundError as error:
            LOG.error('Unable to find file in payload. %s', str(error))
            raise KafkaMsgHandlerError('Missing file in payload')
    LOG.info('Successfully extracted OCP for %s/%s', report_meta.get('cluster_id'), usage_month)
    # Remove temporary directory and files
    shutil.rmtree(temp_dir)
    return report_meta
Beispiel #11
0
 def _get_manifest(self, date_time):
     dates = utils.month_date_range(date_time)
     directory = '{}/{}/{}'.format(REPORTS_DIR, self.cluster_id, dates)
     LOG.info('Looking for manifest at %s', directory)
     report_meta = utils.get_report_details(directory)
     return report_meta
    def test_remove_temp_cur_files(self):
        """Test to remove temporary usage report files."""
        insights_local_dir = tempfile.mkdtemp()
        cluster_id = 'my-ocp-cluster'
        manifest_date = "2018-05-01"
        manifest_data = {
            "uuid": "6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5",
            "cluster_id": cluster_id,
            "date": manifest_date
        }
        manifest = '{}/{}'.format(insights_local_dir, 'manifest.json')
        with open(manifest, 'w') as outfile:
            json.dump(manifest_data, outfile)

        file_list = [{
            'file':
            '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-1.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=3)
        }, {
            'file':
            '6e019de5-a41d-4cdb-b9a0-99bfba9a9cb5-ocp-2.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=3)
        }, {
            'file':
            '2aeb9169-2526-441c-9eca-d7ed015d52bd-ocp-1.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=2)
        }, {
            'file':
            '6c8487e8-c590-4e6a-b2c2-91a2375c0bad-ocp-1.csv.gz',
            'processed_date':
            datetime.datetime(year=2018, month=5, day=1)
        }, {
            'file': '6c8487e8-c590-4e6a-b2c2-91a2375d0bed-ocp-1.csv.gz',
            'processed_date': None
        }]
        expected_delete_list = []
        for item in file_list:
            path = '{}/{}'.format(insights_local_dir, item['file'])
            f = open(path, 'w')
            obj = self.manifest_accessor.get_manifest(self.assembly_id,
                                                      self.provider_id)
            stats = ReportStatsDBAccessor(item['file'], obj.id)
            stats.update(last_completed_datetime=item['processed_date'])
            stats.commit()
            stats.close_session()
            f.close()
            if not item['file'].startswith(
                    manifest_data.get('uuid')) and item['processed_date']:
                expected_delete_list.append(path)
        fake_dir = tempfile.mkdtemp()
        with patch.object(Config, 'INSIGHTS_LOCAL_REPORT_DIR', fake_dir):
            destination_dir = '{}/{}/{}'.format(
                fake_dir, cluster_id,
                month_date_range(parser.parse(manifest_date)))
            os.makedirs(destination_dir, exist_ok=True)
            removed_files = self.ocp_processor.remove_temp_cur_files(
                insights_local_dir)
            self.assertEqual(sorted(removed_files),
                             sorted(expected_delete_list))
            shutil.rmtree(insights_local_dir)
            shutil.rmtree(fake_dir)
Beispiel #13
0
def extract_payload(url, request_id, context={}):  # noqa: C901
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            files - names of .csv usage reports for the manifest
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Once the files are extracted:
    1. Provider account is retrieved for the cluster id.  If no account is found we return.
    2. Manifest database record is created which will establish the assembly_id and number of files
    3. Report stats database record is created and is used as a filter to determine if the file
       has already been processed.
    4. All report files that have not been processed will have the local path to that report file
       added to the report_meta context dictionary for that file.
    5. Report file context dictionaries that require processing is added to a list which will be
       passed to the report processor.  All context from report_meta is used by the processor.

    Args:
        url (String): URL path to payload in the Insights upload service..
        request_id (String): Identifier associated with the payload
        context (Dict): Context for logging (account, etc)

    Returns:
        [dict]: keys: value
                files: [String],
                date: DateTime,
                cluster_id: String
                manifest_path: String,
                provider_uuid: String,
                provider_type: String
                schema_name: String
                manifest_id: Integer
                current_file: String

    """
    temp_dir, temp_file_path, temp_file = download_payload(request_id, url, context)
    manifest_path = extract_payload_contents(request_id, temp_dir, temp_file_path, temp_file, context)

    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = f"{temp_dir}/{manifest_path[0]}"
    report_meta = utils.get_report_details(os.path.dirname(full_manifest_path))

    # Filter and get account from payload's cluster-id
    cluster_id = report_meta.get("cluster_id")
    if context:
        context["cluster_id"] = cluster_id
    account = get_account_from_cluster_id(cluster_id, request_id, context)
    if not account:
        msg = f"Recieved unexpected OCP report from {cluster_id}"
        LOG.error(log_json(request_id, msg, context))
        shutil.rmtree(temp_dir)
        return None
    schema_name = account.get("schema_name")
    provider_type = account.get("provider_type")
    context["account"] = schema_name[4:]
    context["provider_type"] = provider_type
    report_meta["provider_uuid"] = account.get("provider_uuid")
    report_meta["provider_type"] = provider_type
    report_meta["schema_name"] = schema_name
    report_meta["account"] = schema_name[4:]
    report_meta["request_id"] = request_id

    # Create directory tree for report.
    usage_month = utils.month_date_range(report_meta.get("date"))
    destination_dir = f"{Config.INSIGHTS_LOCAL_REPORT_DIR}/{report_meta.get('cluster_id')}/{usage_month}"
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = f"{destination_dir}/{os.path.basename(report_meta.get('manifest_path'))}"
    shutil.copy(report_meta.get("manifest_path"), manifest_destination_path)

    # Save Manifest
    report_meta["manifest_id"] = create_manifest_entries(report_meta, request_id, context)

    # Copy report payload
    report_metas = []
    for report_file in report_meta.get("files"):
        current_meta = report_meta.copy()
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = f"{subdirectory}/{report_file}"
        payload_destination_path = f"{destination_dir}/{report_file}"
        try:
            shutil.copy(payload_source_path, payload_destination_path)
            current_meta["current_file"] = payload_destination_path
            if not record_report_status(report_meta["manifest_id"], report_file, request_id, context):
                msg = f"Successfully extracted OCP for {report_meta.get('cluster_id')}/{usage_month}"
                LOG.info(log_json(request_id, msg, context))
                create_daily_archives(
                    request_id,
                    report_meta["account"],
                    report_meta["provider_uuid"],
                    report_file,
                    payload_destination_path,
                    report_meta["manifest_id"],
                    report_meta["date"],
                    context,
                )
                report_metas.append(current_meta)
            else:
                # Report already processed
                pass
        except FileNotFoundError:
            msg = f"File {str(report_file)} has not downloaded yet."
            LOG.debug(log_json(request_id, msg, context))

    # Remove temporary directory and files
    shutil.rmtree(temp_dir)
    return report_metas
Beispiel #14
0
def extract_payload(url):  # noqa: C901
    """
    Extract OCP usage report payload into local directory structure.

    Payload is expected to be a .tar.gz file that contains:
    1. manifest.json - dictionary containing usage report details needed
        for report processing.
        Dictionary Contains:
            files - names of .csv usage reports for the manifest
            date - DateTime that the payload was created
            uuid - uuid for payload
            cluster_id  - OCP cluster ID.
    2. *.csv - Actual usage report for the cluster.  Format is:
        Format is: <uuid>_report_name.csv

    On successful completion the report and manifest will be in a directory
    structure that the OCPReportDownloader is expecting.

    Ex: /var/tmp/insights_local/my-ocp-cluster-1/20181001-20181101

    Once the files are extracted:
    1. Provider account is retrieved for the cluster id.  If no account is found we return.
    2. Manifest database record is created which will establish the assembly_id and number of files
    3. Report stats database record is created and is used as a filter to determine if the file
       has already been processed.
    4. All report files that have not been processed will have the local path to that report file
       added to the report_meta context dictionary for that file.
    5. Report file context dictionaries that require processing is added to a list which will be
       passed to the report processor.  All context from report_meta is used by the processor.

    Args:
        url (String): URL path to payload in the Insights upload service..

    Returns:
        [dict]: keys: value
                files: [String],
                date: DateTime,
                cluster_id: String
                manifest_path: String,
                provider_uuid: String,
                provider_type: String
                schema_name: String
                manifest_id: Integer
                current_file: String

    """
    # Create temporary directory for initial file staging and verification in the
    # OpenShift PVC directory so that any failures can be triaged in the event
    # the pod goes down.
    os.makedirs(Config.PVC_DIR, exist_ok=True)
    temp_dir = tempfile.mkdtemp(dir=Config.PVC_DIR)

    # Download file from quarantine bucket as tar.gz
    try:
        download_response = requests.get(url)
        download_response.raise_for_status()
    except requests.exceptions.HTTPError as err:
        shutil.rmtree(temp_dir)
        raise KafkaMsgHandlerError("Unable to download file. Error: ", str(err))

    temp_file = "{}/{}".format(temp_dir, "usage.tar.gz")
    try:
        temp_file_hdl = open("{}/{}".format(temp_dir, "usage.tar.gz"), "wb")
        temp_file_hdl.write(download_response.content)
        temp_file_hdl.close()
    except (OSError, IOError) as error:
        shutil.rmtree(temp_dir)
        raise KafkaMsgHandlerError("Unable to write file. Error: ", str(error))

    # Extract tarball into temp directory
    try:
        mytar = TarFile.open(temp_file)
        mytar.extractall(path=temp_dir)
        files = mytar.getnames()
        manifest_path = [manifest for manifest in files if "manifest.json" in manifest]
    except (ReadError, EOFError, OSError) as error:
        LOG.warning("Unable to untar file. Reason: %s", str(error))
        shutil.rmtree(temp_dir)
        raise KafkaMsgHandlerError("Extraction failure.")

    if not manifest_path:
        raise KafkaMsgHandlerError("No manifest found in payload.")
    # Open manifest.json file and build the payload dictionary.
    full_manifest_path = "{}/{}".format(temp_dir, manifest_path[0])
    report_meta = utils.get_report_details(os.path.dirname(full_manifest_path))

    # Filter and get account from payload's cluster-id
    account = get_account_from_cluster_id(report_meta.get("cluster_id"))
    if not account:
        LOG.error(f"Recieved unexpected OCP report from {report_meta.get('cluster_id')}")
        shutil.rmtree(temp_dir)
        return None

    report_meta["provider_uuid"] = account.get("provider_uuid")
    report_meta["provider_type"] = account.get("provider_type")
    report_meta["schema_name"] = account.get("schema_name")

    # Create directory tree for report.
    usage_month = utils.month_date_range(report_meta.get("date"))
    destination_dir = "{}/{}/{}".format(Config.INSIGHTS_LOCAL_REPORT_DIR, report_meta.get("cluster_id"), usage_month)
    os.makedirs(destination_dir, exist_ok=True)

    # Copy manifest
    manifest_destination_path = "{}/{}".format(destination_dir, os.path.basename(report_meta.get("manifest_path")))
    shutil.copy(report_meta.get("manifest_path"), manifest_destination_path)

    # Save Manifest
    report_meta["manifest_id"] = create_manifest_entries(report_meta)

    # Copy report payload
    report_metas = []
    for report_file in report_meta.get("files"):
        current_meta = report_meta.copy()
        subdirectory = os.path.dirname(full_manifest_path)
        payload_source_path = f"{subdirectory}/{report_file}"
        payload_destination_path = f"{destination_dir}/{report_file}"
        try:
            shutil.copy(payload_source_path, payload_destination_path)
            current_meta["current_file"] = payload_destination_path
            if not record_report_status(report_meta["manifest_id"], report_file):
                LOG.info("Successfully extracted OCP for %s/%s", report_meta.get("cluster_id"), usage_month)
                report_metas.append(current_meta)
            else:
                # Report already processed
                pass
        except FileNotFoundError:
            LOG.debug("File %s has not downloaded yet.", str(report_file))

    # Remove temporary directory and files
    shutil.rmtree(temp_dir)
    return report_metas
Beispiel #15
0
 def _get_manifest(self, date_time):
     dates = utils.month_date_range(date_time)
     directory = f"{REPORTS_DIR}/{self.cluster_id}/{dates}"
     LOG.info("Looking for manifest at %s", directory)
     report_meta = utils.get_report_details(directory)
     return report_meta