def test_error_message(self): event, context = create_event(), LambdaContextMock() logger = CumulusLogger() logger.setMetadata(event, context) try: 1 / 0 except ZeroDivisionError as ex: msg = logger.createMessage("test exc_info", exc_info=False) self.assertIn("test exc_info", msg["message"]) self.assertNotIn("ZeroDivisionError", msg["message"]) logger.error("test exc_info", exc_info=False) msg = logger.createMessage( "test formatted {} exc_info ", "bar", exc_info=True) self.assertIn("test formatted bar exc_info", msg["message"]) self.assertIn("ZeroDivisionError", msg["message"]) logger.warn("test formatted {} exc_info ", "bar", exc_info=True) msg = logger.createMessage( "test exc_info", exc_info=sys.exc_info()) self.assertIn("test exc_info", msg["message"]) self.assertIn("ZeroDivisionError", msg["message"]) logger.fatal("test exc_info", exc_info=sys.exc_info()) msg = logger.createMessage("test exc_info", exc_info=ex) self.assertIn("test exc_info", msg["message"]) self.assertIn("ZeroDivisionError", msg["message"]) logger.trace("test exc_info", exc_info=ex)
class DMRPPGenerator(Process): """ Class to generate dmrpp files from hdf and netCDf files The input will be *.nc *nc4 *.hdf The output *.nc.dmrpp *nc4.dmrpp *.hdf.dmrpp """ def __init__(self, **kwargs): self.processing_regex = kwargs.get('config', {}) \ .get('collection', {}) \ .get('meta', {}) \ .get('dmrpp_processing_regex', '.*\\.(((?i:(h|hdf)))(e)?5|nc(4)?)(\\.bz2|\\.gz|\\.Z)?') super(DMRPPGenerator, self).__init__(**kwargs) self.path = self.path.rstrip('/') + "/" # Enable logging the default is True enable_logging = os.getenv('ENABLE_CW_LOGGING', True) in [True, "true", "t", 1] self.dmrpp_version = f"DMRPP {__version__}" if enable_logging: self.logger = CumulusLogger(name="DMRPP-Generator") @property def input_keys(self): return { 'input_files': f"{self.processing_regex}(\\.cmr\\.xml|\\.json)?$" } @staticmethod def get_file_type(filename, files): """ Get custom file type, default to metadata :param filename: Granule file name :param files: list of collection files :return: file type if defined """ for collection_file in files: if search(collection_file.get('regex', '*.'), filename): return collection_file.get('type', 'metadata') return 'metadata' @staticmethod def get_bucket(filename, files, buckets): """ Extract the bucket from the files :param filename: Granule file name :param files: list of collection files :param buckets: Object holding buckets info :return: Bucket object """ bucket_type = "public" for file in files: if search(file.get('regex', '*.'), filename): bucket_type = file['bucket'] break return buckets[bucket_type] def upload_file_to_s3(self, filename, uri): """ Upload a local file to s3 if collection payload provided """ try: return s3.upload(filename, uri, extra={}) except Exception as e: self.logger.error( "{self.dmrpp_version}: Error uploading file %s: %s" % (os.path.basename(os.path.basename(filename)), str(e))) def process(self): """ Override the processing wrapper :return: """ collection = self.config.get('collection') collection_files = collection.get('files', []) collection_meta = collection.get('meta', {}) dmrpp_meta = collection_meta.get('dmrpp', self.config.get('dmrpp', {})) buckets = self.config.get('buckets') granules = self.input['granules'] self.processing_regex = dmrpp_meta.get('dmrpp_regex', self.processing_regex) for granule in granules: dmrpp_files = [] for file_ in granule['files']: if not search(f"{self.processing_regex}$", file_['filename']): self.logger.debug( f"{self.dmrpp_version}: regex {self.processing_regex} does not match filename {file_['filename']}" ) continue self.logger.debug( f"{self.dmrpp_version}: reges {self.processing_regex} matches filename to process {file_['filename']}" ) output_file_paths = self.dmrpp_generate( input_file=file_['filename'], dmrpp_meta=dmrpp_meta) for output_file_path in output_file_paths: output_file_basename = os.path.basename(output_file_path) url_path = file_.get('url_path', self.config.get('fileStagingDir')) filepath = os.path.dirname(file_.get('filepath', url_path)) if output_file_path: dmrpp_file = { "name": os.path.basename(output_file_path), "path": self.config.get('fileStagingDir'), "url_path": url_path, "bucket": self.get_bucket(output_file_basename, collection_files, buckets)['name'], "size": os.path.getsize(output_file_path), "type": self.get_file_type(output_file_basename, collection_files) } dmrpp_file[ 'filepath'] = f"{filepath}/{dmrpp_file['name']}".lstrip( '/') dmrpp_file[ 'filename'] = f's3://{dmrpp_file["bucket"]}/{dmrpp_file["filepath"]}' dmrpp_files.append(dmrpp_file) self.upload_file_to_s3(output_file_path, dmrpp_file['filename']) granule['files'] += dmrpp_files return self.input def get_dmrpp_command(self, dmrpp_meta, input_path, output_filename, local=False): """ Getting the command line to create DMRPP files """ dmrpp_meta = dmrpp_meta if isinstance(dmrpp_meta, dict) else {} dmrpp_options = DMRppOptions(self.path) options = dmrpp_options.get_dmrpp_option(dmrpp_meta=dmrpp_meta) local_option = f"-u file://{output_filename}" if local else "" dmrpp_cmd = f"get_dmrpp {options} {input_path} -o {output_filename}.dmrpp {local_option} {os.path.basename(output_filename)}" return " ".join(dmrpp_cmd.split()) def add_missing_files(self, dmrpp_meta, file_name): """ """ # If the missing file was not generated if not os.path.isfile(file_name): return [] # If it was generated and the flag was set options = dmrpp_meta.get('options', []) if {'flag': '-M'} in options: return [file_name] return [] @staticmethod def run_command(cmd): """ Run cmd as a system command """ out = subprocess.run(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE) return out def dmrpp_generate(self, input_file, local=False, dmrpp_meta=None): """ Generate DMRPP from S3 file """ # Force dmrpp_meta to be an object dmrpp_meta = dmrpp_meta if isinstance(dmrpp_meta, dict) else {} # If not running locally use Cumulus logger logger = logging if local else self.logger cmd_output = "" try: file_name = input_file if local else s3.download(input_file, path=self.path) cmd = self.get_dmrpp_command(dmrpp_meta, self.path, file_name, local) cmd_output = self.run_command(cmd) logger.error( f"{self.dmrpp_version}: command {cmd} returned {cmd_output.stderr}" ) if cmd_output.stderr else "" out_files = [f"{file_name}.dmrpp"] + self.add_missing_files( dmrpp_meta, f'{file_name}.dmrpp.missing') return out_files except Exception as ex: logger.error( f"{self.dmrpp_version}: error {ex}: {cmd_output.stdout} {cmd_output.stderr}" ) return []
def run_cumulus_task(task_function, cumulus_message, context=None, schemas=None, **taskargs): """ Interprets incoming messages, passes them to an inner handler, gets the response and transforms it into an outgoing message, returned by Lambda. Arguments: task_function -- Required. The function containing the business logic of the cumulus task cumulus_message -- Required. Either a full Cumulus Message or a Cumulus Remote Message context -- AWS Lambda context object schemas -- Optional. A dict with filepaths of `input`, `config`, and `output` schemas that are relative to the task root directory. All three properties of this dict are optional. If omitted, the message adapter will look in `/<task_root>/schemas/<schema_type>.json`, and if not found there, will be ignored. taskargs -- Optional. Additional keyword arguments for the task_function """ set_sys_path() from message_adapter.message_adapter import MessageAdapter context_dict = vars(context) if context else {} logger = CumulusLogger() logger.setMetadata(cumulus_message, context) message_adapter_disabled = str( os.environ.get('CUMULUS_MESSAGE_ADAPTER_DISABLED')).lower() if message_adapter_disabled == 'true': try: return task_function(cumulus_message, context, **taskargs) except Exception as exception: name = exception.args[0] if isinstance(name, str) and 'WorkflowError' in name: cumulus_message['payload'] = None cumulus_message['exception'] = name logger.error('WorkflowError') return cumulus_message logger.error(exception) raise adapter = MessageAdapter(schemas) full_event = adapter.load_and_update_remote_event(cumulus_message, context_dict) nested_event = adapter.load_nested_event(full_event, context_dict) message_config = nested_event.get('messageConfig', {}) try: task_response = task_function(nested_event, context, **taskargs) except Exception as exception: name = exception.args[0] if isinstance(name, str) and 'WorkflowError' in name: cumulus_message['payload'] = None cumulus_message['exception'] = name logger.error('WorkflowError') return cumulus_message logger.error(exception) raise return adapter.create_next_event(task_response, full_event, message_config)