def test_empty_event_and_context(self):
     event, context = {}, {}
     logger = CumulusLogger()
     logger.setMetadata(event, context)
     msg = logger.createMessage("empty event and context")
     self.assertEqual(set(msg.keys()),
                      {"version", "sender", "message", "timestamp"})
 def test_logger_name_loglevel(self):
     event, context = create_event(), LambdaContextMock()
     logger = CumulusLogger('logger_test', logging.INFO)
     logger.setMetadata(event, context)
     self.assertTrue(logger.logger.getEffectiveLevel() == logging.INFO)
     logger.debug("test logging level debug")
     logger.info("test logging level info")
     logger.warning("test logging level warning")
    def __init__(self, **kwargs):
        self.processing_regex =  kwargs.get('config', {}) \
                                    .get('collection', {}) \
                                    .get('meta', {}) \
                                    .get('dmrpp_processing_regex', '.*\\.(((?i:(h|hdf)))(e)?5|nc(4)?)(\\.bz2|\\.gz|\\.Z)?')

        super(DMRPPGenerator, self).__init__(**kwargs)
        self.path = self.path.rstrip('/') + "/"
        # Enable logging the default is True
        enable_logging = os.getenv('ENABLE_CW_LOGGING',
                                   True) in [True, "true", "t", 1]
        self.dmrpp_version = f"DMRPP {__version__}"
        if enable_logging:
            self.logger = CumulusLogger(name="DMRPP-Generator")
 def test_formatted_message(self):
     event, context = create_event(), LambdaContextMock()
     logger = CumulusLogger()
     logger.setMetadata(event, context)
     msg = logger.createMessage("test formatted {} {}", "foo", "bar")
     self.assertEqual(msg["message"], "test formatted foo bar")
     logger.debug("test formatted {} {}", "foo", "bar")
 def test_parameter_configured_message(self):
     event, context = create_parameter_event(), LambdaContextMock()
     logger = CumulusLogger()
     logger.setMetadata(event, context)
     msg = logger.createMessage("test parameter event")
     self.assertEqual(msg["sender"], context.function_name)
     self.assertEqual(msg["version"], context.function_version)
     self.assertEqual(
         msg["executions"],
         event["cma"]["event"]["cumulus_meta"]["execution_name"])
     self.assertEqual(
         msg["asyncOperationId"],
         event["cma"]["event"]["cumulus_meta"]["asyncOperationId"])
     self.assertEqual(
         msg["granules"],
         json.dumps([granule["granuleId"]
                     for granule in event["cma"]["event"]["payload"]["granules"]]))
     self.assertEqual(
         msg["parentArn"],
         event["cma"]["event"]["cumulus_meta"]["parentExecutionArn"])
     self.assertEqual(
         msg["stackName"],
         event["cma"]["event"]["meta"]["stack"])
     self.assertEqual(msg["message"], "test parameter event")
     logger.info("test parameter configured message")
class DMRPPGenerator(Process):
    """
    Class to generate dmrpp files from hdf and netCDf files
    The input will be *.nc *nc4 *.hdf
    The output *.nc.dmrpp *nc4.dmrpp *.hdf.dmrpp
    """
    def __init__(self, **kwargs):
        self.processing_regex =  kwargs.get('config', {}) \
                                    .get('collection', {}) \
                                    .get('meta', {}) \
                                    .get('dmrpp_processing_regex', '.*\\.(((?i:(h|hdf)))(e)?5|nc(4)?)(\\.bz2|\\.gz|\\.Z)?')

        super(DMRPPGenerator, self).__init__(**kwargs)
        self.path = self.path.rstrip('/') + "/"
        # Enable logging the default is True
        enable_logging = os.getenv('ENABLE_CW_LOGGING',
                                   True) in [True, "true", "t", 1]
        self.dmrpp_version = f"DMRPP {__version__}"
        if enable_logging:
            self.logger = CumulusLogger(name="DMRPP-Generator")

    @property
    def input_keys(self):

        return {
            'input_files': f"{self.processing_regex}(\\.cmr\\.xml|\\.json)?$"
        }

    @staticmethod
    def get_file_type(filename, files):
        """
        Get custom file type, default to metadata
        :param filename: Granule file name
        :param files: list of collection files
        :return: file type if defined
        """

        for collection_file in files:
            if search(collection_file.get('regex', '*.'), filename):
                return collection_file.get('type', 'metadata')
        return 'metadata'

    @staticmethod
    def get_bucket(filename, files, buckets):
        """
        Extract the bucket from the files
        :param filename: Granule file name
        :param files: list of collection files
        :param buckets: Object holding buckets info
        :return: Bucket object
        """
        bucket_type = "public"
        for file in files:
            if search(file.get('regex', '*.'), filename):
                bucket_type = file['bucket']
                break
        return buckets[bucket_type]

    def upload_file_to_s3(self, filename, uri):
        """ Upload a local file to s3 if collection payload provided """
        try:
            return s3.upload(filename, uri, extra={})
        except Exception as e:
            self.logger.error(
                "{self.dmrpp_version}: Error uploading file %s: %s" %
                (os.path.basename(os.path.basename(filename)), str(e)))

    def process(self):
        """
        Override the processing wrapper
        :return:
        """
        collection = self.config.get('collection')
        collection_files = collection.get('files', [])
        collection_meta = collection.get('meta', {})
        dmrpp_meta = collection_meta.get('dmrpp', self.config.get('dmrpp', {}))
        buckets = self.config.get('buckets')
        granules = self.input['granules']
        self.processing_regex = dmrpp_meta.get('dmrpp_regex',
                                               self.processing_regex)
        for granule in granules:
            dmrpp_files = []
            for file_ in granule['files']:
                if not search(f"{self.processing_regex}$", file_['filename']):
                    self.logger.debug(
                        f"{self.dmrpp_version}: regex {self.processing_regex} does not match filename {file_['filename']}"
                    )
                    continue
                self.logger.debug(
                    f"{self.dmrpp_version}: reges {self.processing_regex} matches filename to process {file_['filename']}"
                )
                output_file_paths = self.dmrpp_generate(
                    input_file=file_['filename'], dmrpp_meta=dmrpp_meta)
                for output_file_path in output_file_paths:
                    output_file_basename = os.path.basename(output_file_path)
                    url_path = file_.get('url_path',
                                         self.config.get('fileStagingDir'))
                    filepath = os.path.dirname(file_.get('filepath', url_path))
                    if output_file_path:
                        dmrpp_file = {
                            "name":
                            os.path.basename(output_file_path),
                            "path":
                            self.config.get('fileStagingDir'),
                            "url_path":
                            url_path,
                            "bucket":
                            self.get_bucket(output_file_basename,
                                            collection_files, buckets)['name'],
                            "size":
                            os.path.getsize(output_file_path),
                            "type":
                            self.get_file_type(output_file_basename,
                                               collection_files)
                        }
                        dmrpp_file[
                            'filepath'] = f"{filepath}/{dmrpp_file['name']}".lstrip(
                                '/')
                        dmrpp_file[
                            'filename'] = f's3://{dmrpp_file["bucket"]}/{dmrpp_file["filepath"]}'
                        dmrpp_files.append(dmrpp_file)
                        self.upload_file_to_s3(output_file_path,
                                               dmrpp_file['filename'])
            granule['files'] += dmrpp_files

        return self.input

    def get_dmrpp_command(self,
                          dmrpp_meta,
                          input_path,
                          output_filename,
                          local=False):
        """
        Getting the command line to create DMRPP files
        """
        dmrpp_meta = dmrpp_meta if isinstance(dmrpp_meta, dict) else {}
        dmrpp_options = DMRppOptions(self.path)
        options = dmrpp_options.get_dmrpp_option(dmrpp_meta=dmrpp_meta)
        local_option = f"-u file://{output_filename}" if local else ""
        dmrpp_cmd = f"get_dmrpp {options} {input_path} -o {output_filename}.dmrpp {local_option} {os.path.basename(output_filename)}"
        return " ".join(dmrpp_cmd.split())

    def add_missing_files(self, dmrpp_meta, file_name):
        """
        """
        # If the missing file was not generated
        if not os.path.isfile(file_name):
            return []
        # If it was generated and the flag was set
        options = dmrpp_meta.get('options', [])
        if {'flag': '-M'} in options:
            return [file_name]
        return []

    @staticmethod
    def run_command(cmd):
        """ Run cmd as a system command """
        out = subprocess.run(cmd.split(),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        return out

    def dmrpp_generate(self, input_file, local=False, dmrpp_meta=None):
        """
        Generate DMRPP from S3 file
        """
        # Force dmrpp_meta to be an object
        dmrpp_meta = dmrpp_meta if isinstance(dmrpp_meta, dict) else {}
        # If not running locally use Cumulus logger
        logger = logging if local else self.logger
        cmd_output = ""
        try:
            file_name = input_file if local else s3.download(input_file,
                                                             path=self.path)
            cmd = self.get_dmrpp_command(dmrpp_meta, self.path, file_name,
                                         local)
            cmd_output = self.run_command(cmd)
            logger.error(
                f"{self.dmrpp_version}: command {cmd} returned {cmd_output.stderr}"
            ) if cmd_output.stderr else ""
            out_files = [f"{file_name}.dmrpp"] + self.add_missing_files(
                dmrpp_meta, f'{file_name}.dmrpp.missing')
            return out_files

        except Exception as ex:
            logger.error(
                f"{self.dmrpp_version}: error {ex}: {cmd_output.stdout} {cmd_output.stderr}"
            )
            return []
Esempio n. 7
0
def run_cumulus_task(task_function,
                     cumulus_message,
                     context=None,
                     schemas=None,
                     **taskargs):
    """
    Interprets incoming messages, passes them to an inner handler, gets the
    response and transforms it into an outgoing message, returned by Lambda.

    Arguments:
        task_function -- Required. The function containing the business logic
            of the cumulus task
        cumulus_message -- Required. Either a full Cumulus Message or a Cumulus
            Remote Message
        context -- AWS Lambda context object
        schemas -- Optional. A dict with filepaths of `input`, `config`, and
            `output` schemas that are relative to the task root directory. All
            three properties of this dict are optional. If omitted, the message
            adapter will look in `/<task_root>/schemas/<schema_type>.json`, and
            if not found there, will be ignored.
        taskargs -- Optional. Additional keyword arguments for the
            task_function
    """

    set_sys_path()
    from message_adapter.message_adapter import MessageAdapter

    context_dict = vars(context) if context else {}
    logger = CumulusLogger()
    logger.setMetadata(cumulus_message, context)
    message_adapter_disabled = str(
        os.environ.get('CUMULUS_MESSAGE_ADAPTER_DISABLED')).lower()

    if message_adapter_disabled == 'true':
        try:
            return task_function(cumulus_message, context, **taskargs)
        except Exception as exception:
            name = exception.args[0]
            if isinstance(name, str) and 'WorkflowError' in name:
                cumulus_message['payload'] = None
                cumulus_message['exception'] = name
                logger.error('WorkflowError')
                return cumulus_message
            logger.error(exception)
            raise

    adapter = MessageAdapter(schemas)
    full_event = adapter.load_and_update_remote_event(cumulus_message,
                                                      context_dict)
    nested_event = adapter.load_nested_event(full_event, context_dict)
    message_config = nested_event.get('messageConfig', {})

    try:
        task_response = task_function(nested_event, context, **taskargs)
    except Exception as exception:
        name = exception.args[0]
        if isinstance(name, str) and 'WorkflowError' in name:
            cumulus_message['payload'] = None
            cumulus_message['exception'] = name
            logger.error('WorkflowError')
            return cumulus_message
        logger.error(exception)
        raise

    return adapter.create_next_event(task_response, full_event, message_config)
import os
import sys

from run_cumulus_task import run_cumulus_task
from cumulus_logger import CumulusLogger

logger = CumulusLogger()

schemas = {
    "input": "schemas/input.json",
    "config": "schemas/config.json",
    "output": "schemas/output.json"
}

def task(event, context):
    """simple task that returns the updated event"""
    # example logging inside of a task using CumulusLogger
    logger.info('task executed')

    # log error when an exception is caught
    logger.error("task formatted message {} exc_info ", "bar", exc_info=True)

    # return the output of the task
    return { "goodbye": event["input"]["hello"] }

def handler(event, context):
    """handler that is provided to aws lambda"""
    # make sure event & context metadata is set in the logger
    logger.setMetadata(event, context)
    return run_cumulus_task(task, event, context, schemas)
Esempio n. 9
0
"""
Name: extract_filepaths_for_granule.py

Description:  Extracts the keys (filepaths) for a granule's files from a Cumulus Message.
"""

import re
import os

from cumulus_logger import CumulusLogger
from run_cumulus_task import run_cumulus_task

LOGGER = CumulusLogger()


class ExtractFilePathsError(Exception):
    """Exception to be raised if any errors occur"""


def task(event, context):  #pylint: disable-msg=unused-argument
    """
    Task called by the handler to perform the work.

    This task will parse the input, removing the granuleId and file keys for a granule.

        Args:
            event (dict): passed through from the handler
            context (Object): passed through from the handler

        Returns:
            dict: dict containing granuleId and keys. See handler for detail.
Esempio n. 10
0
import os
import re
import boto3
import paramiko
import requests
import urllib3
from bs4 import BeautifulSoup
from cumulus_logger import CumulusLogger
from dateutil.parser import parse

from task.dgm import *

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

logging_level = logging.INFO if os.getenv('enable_logging', 'false').lower() == 'true' else logging.WARNING
rdg_logger = CumulusLogger(name='Recursive-Discover-Granules', level=logging_level)


class DiscoverGranules:
    """
    This class contains functions that fetch
    The metadata of the granules via a protocol X (HTTP/SFTP/S3)
    Compare the md5 of these granules with the ones in an S3
    It will return the files if they don't exist in S3 or the md5 doesn't match
    """

    def __init__(self, event):
        """
        Default values goes here
        """
        self.input = event.get('input')
    def test_error_message(self):
        event, context = create_event(), LambdaContextMock()
        logger = CumulusLogger()
        logger.setMetadata(event, context)
        try:
            1 / 0
        except ZeroDivisionError as ex:
            msg = logger.createMessage("test exc_info", exc_info=False)
            self.assertIn("test exc_info", msg["message"])
            self.assertNotIn("ZeroDivisionError", msg["message"])
            logger.error("test exc_info", exc_info=False)

            msg = logger.createMessage(
                "test formatted {} exc_info ", "bar", exc_info=True)
            self.assertIn("test formatted bar exc_info", msg["message"])
            self.assertIn("ZeroDivisionError", msg["message"])
            logger.warn("test formatted {} exc_info ", "bar", exc_info=True)

            msg = logger.createMessage(
                "test exc_info", exc_info=sys.exc_info())
            self.assertIn("test exc_info", msg["message"])
            self.assertIn("ZeroDivisionError", msg["message"])
            logger.fatal("test exc_info", exc_info=sys.exc_info())

            msg = logger.createMessage("test exc_info", exc_info=ex)
            self.assertIn("test exc_info", msg["message"])
            self.assertIn("ZeroDivisionError", msg["message"])
            logger.trace("test exc_info", exc_info=ex)