def remove_temp_cur_files(path): """Remove temporary cost usage report files.""" files = os.listdir(path) victim_list = [] current_assembly_id = None for file in files: file_path = '{}/{}'.format(path, file) if file.endswith('Manifest.json'): with open(file_path, 'r') as manifest_file_handle: manifest_json = json.load(manifest_file_handle) current_assembly_id = manifest_json.get('assemblyId') else: stats = ReportStatsDBAccessor(file) completed_date = stats.get_last_completed_datetime() if completed_date: assembly_id = utils.extract_uuids_from_string(file).pop() victim_list.append({'file': file_path, 'completed_date': completed_date, 'assemblyId': assembly_id}) removed_files = [] for victim in victim_list: if victim['assemblyId'] != current_assembly_id: LOG.info('Removing %s, completed processing on date %s', victim['file'], victim['completed_date']) os.remove(victim['file']) removed_files.append(victim['file']) return removed_files
def test_extract_uuids_from_string_capitals(self): """Test that a uuid is extracted from a string with capital letters.""" assembly_id = "882083B7-EA62-4AAB-aA6a-f0d08d65Ee2b" cur_key = f"/koku/20180701-20180801/{assembly_id}/koku-1.csv.gz" uuids = common_utils.extract_uuids_from_string(cur_key) self.assertEqual(len(uuids), 1) self.assertEqual(uuids.pop(), assembly_id)
def test_extract_uuids_from_string(self): """Test that a uuid is extracted from a string.""" assembly_id = "882083b7-ea62-4aab-aa6a-f0d08d65ee2b" cur_key = f"/koku/20180701-20180801/{assembly_id}/koku-1.csv.gz" uuids = common_utils.extract_uuids_from_string(cur_key) self.assertEqual(len(uuids), 1) self.assertEqual(uuids.pop(), assembly_id)
def get_billing_source(self): """ Retrieve the name of the S3 bucket from the SNS notification. FIXME: Diving through the response body to extract values such as object_key and s3_bucket_name could be improved upon. A concerted effort to determine whether or not there will always be one item in "Records" should be done to know that this method will be reliable. FIXME: The filtering for only top-level manifest files was done as a quick way to limit the number of download/processing requests. If we can find a better way to ensure we are only responding to one notification when multiple files have changed in the bucket then the filtering could be removed. Args: None Returns: (String) Name of the billing source from the notification Raises: (NotificationInterfaceFilter): Is raised when the notification is not for a top-level manifest file. (AWSNotificationHandlerError): Is raised when parsing the notification body message fails. """ s3_bucket_name = None if self._msg_type == 'Notification' and self._body: body_dict = json.loads(self._body) message_dict = json.loads(body_dict['Message']) # There must be a more reliable way of getting bucket name.. object_key = None s3_bucket_name = None try: object_key = message_dict['Records'][0]['s3']['object']['key'] s3_bucket_name = message_dict['Records'][0]['s3']['bucket'][ 'name'] except KeyError: raise AWSNotificationHandlerError( 'Unexpected \"Message\" element in body.') if object_key.endswith('Manifest.json'): if utils.extract_uuids_from_string(object_key): msg = 'Ignoring non-toplevel manifest file: {}'.format( object_key) raise NotificationInterfaceFilter(msg) if not object_key.endswith('Manifest.json'): msg = 'Ignoring non-manifest file: {}'.format(object_key) raise NotificationInterfaceFilter(msg) return s3_bucket_name if s3_bucket_name else None
def _get_manifest(self, date_time): """ Download and return the CUR manifest for the given date. Args: date_time (DateTime): The starting datetime object Returns: (Dict): A dict-like object serialized from JSON data. """ report_path = self._get_report_path(date_time) manifest = {} local_path = '{}/{}/{}'.format(self.local_storage, self.container_name, report_path) if not os.path.exists(local_path): LOG.error('Unable to find manifest.') return manifest report_names = os.listdir(local_path) sorted_by_modified_date = sorted( report_names, key=lambda file: os.path.getmtime(f'{local_path}/{file}')) if sorted_by_modified_date: report_name = report_names[0] # First item on list is most recent try: manifest['assemblyId'] = extract_uuids_from_string( report_name).pop() except IndexError: message = 'Unable to extract assemblyID from %s'.format( report_name) raise AzureReportDownloaderError(message) billing_period = { 'start': (report_path.split('/')[-1]).split('-')[0], 'end': (report_path.split('/')[-1]).split('-')[1] } manifest['billingPeriod'] = billing_period manifest['reportKeys'] = [f'{local_path}/{report_name}'] manifest['Compression'] = UNCOMPRESSED manifest_file = '{}/{}'.format(self._get_exports_data_directory(), 'Manifest.json') with open(manifest_file, 'w') as manifest_hdl: manifest_hdl.write(json.dumps(manifest)) return manifest
def _get_manifest(self, date_time): """ Download and return the CUR manifest for the given date. Args: date_time (DateTime): The starting datetime object Returns: (Dict): A dict-like object serialized from JSON data. """ report_path = self._get_report_path(date_time) manifest = {} local_path = f"{self.local_storage}/{self.container_name}/{report_path}" if not os.path.exists(local_path): msg = f"Unable to find manifest: {local_path}." LOG.error(log_json(self.request_id, msg, self.context)) return manifest manifest_modified_timestamp = None report_names = os.listdir(local_path) sorted_by_modified_date = sorted( report_names, key=lambda file: os.path.getmtime(f"{local_path}/{file}")) if sorted_by_modified_date: report_name = report_names[0] # First item on list is most recent full_file_path = f"{local_path}/{report_name}" manifest_modified_timestamp = datetime.datetime.fromtimestamp( os.path.getmtime(full_file_path)) try: manifest["assemblyId"] = extract_uuids_from_string( report_name).pop() except IndexError: message = f"Unable to extract assemblyID from {report_name}" raise AzureReportDownloaderError(message) billing_period = { "start": (report_path.split("/")[-1]).split("-")[0], "end": (report_path.split("/")[-1]).split("-")[1], } manifest["billingPeriod"] = billing_period manifest["reportKeys"] = [f"{local_path}/{report_name}"] manifest["Compression"] = UNCOMPRESSED return manifest, manifest_modified_timestamp
def get_assembly_id_from_cur_key(key): """ Get the assembly ID from a cost and usage report key. Args: key (String): Full key for a cost and usage report location. example: /koku/20180701-20180801/882083b7-ea62-4aab-aa6a-f0d08d65ee2b/koku-1.csv.gz Returns: (String): "Assembly ID UUID" example: "882083b7-ea62-4aab-aa6a-f0d08d65ee2b" """ assembly_id = utils.extract_uuids_from_string(key) assembly_id = assembly_id.pop() if assembly_id else None return assembly_id
def _get_manifest(self, date_time): """ Download and return the CUR manifest for the given date. Args: date_time (DateTime): The starting datetime object Returns: (Dict): A dict-like object serialized from JSON data. """ report_path = self._get_report_path(date_time) manifest = {} try: blob = self._azure_client.get_latest_cost_export_for_path( report_path, self.container_name) except AzureCostReportNotFound as ex: LOG.error('Unable to find manifest. Error: %s', str(ex)) return manifest report_name = blob.name try: manifest['assemblyId'] = extract_uuids_from_string( report_name).pop() except IndexError: message = 'Unable to extract assemblyID from %s'.format( report_name) raise AzureReportDownloaderError(message) billing_period = { 'start': (report_path.split('/')[-1]).split('-')[0], 'end': (report_path.split('/')[-1]).split('-')[1] } manifest['billingPeriod'] = billing_period manifest['reportKeys'] = [report_name] manifest['Compression'] = UNCOMPRESSED manifest_file = '{}/{}'.format(self._get_exports_data_directory(), 'Manifest.json') with open(manifest_file, 'w') as manifest_hdl: manifest_hdl.write(json.dumps(manifest)) return manifest
def _get_manifest(self, date_time): """ Download and return the CUR manifest for the given date. Args: date_time (DateTime): The starting datetime object Returns: (Dict): A dict-like object serialized from JSON data. """ report_path = self._get_report_path(date_time) manifest = {} try: blob = self._azure_client.get_latest_cost_export_for_path( report_path, self.container_name) except AzureCostReportNotFound as ex: msg = f"Unable to find manifest. Error: {str(ex)}" LOG.info(log_json(self.request_id, msg, self.context)) return manifest, None report_name = blob.name try: manifest["assemblyId"] = extract_uuids_from_string( report_name).pop() except IndexError: message = f"Unable to extract assemblyID from {report_name}" raise AzureReportDownloaderError(message) billing_period = { "start": (report_path.split("/")[-1]).split("-")[0], "end": (report_path.split("/")[-1]).split("-")[1], } manifest["billingPeriod"] = billing_period manifest["reportKeys"] = [report_name] manifest["Compression"] = UNCOMPRESSED return manifest, blob.last_modified
def remove_temp_cur_files(self, report_path, manifest_id): """Remove temporary cost usage report files.""" files = listdir(report_path) LOG.info('Cleaning up temporary report files for %s', self._report_name) victim_list = [] current_assembly_id = None for file in files: file_path = '{}/{}'.format(report_path, file) if file.endswith('Manifest.json'): with open(file_path, 'r') as manifest_file_handle: manifest_json = json.load(manifest_file_handle) current_assembly_id = manifest_json.get('assemblyId') else: with ReportStatsDBAccessor(file, manifest_id) as stats: completed_date = stats.get_last_completed_datetime() if completed_date: assembly_id = extract_uuids_from_string(file).pop() victim_list.append({ 'file': file_path, 'completed_date': completed_date, 'assemblyId': assembly_id }) removed_files = [] for victim in victim_list: if victim['assemblyId'] != current_assembly_id: try: LOG.info('Removing %s, completed processing on date %s', victim['file'], victim['completed_date']) remove(victim['file']) removed_files.append(victim['file']) except FileNotFoundError: LOG.warning('Unable to locate file: %s', victim['file']) return removed_files