Esempio n. 1
0
 def __init__(self, event, context):
     if context:
         get_logger().info('SUPERVISOR: Initializing AWS Lambda supervisor')
         self.lambda_instance = LambdaInstance(event, context)
         self.body = {}
     else:
         raise NoLambdaContextError()
Esempio n. 2
0
 def execute_function(self):
     if SysUtils.is_var_in_env('SCRIPT'):
         script_path = SysUtils.join_paths(
             SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME)
         script_content = StrUtils.base64_to_str(
             SysUtils.get_env_var('SCRIPT'))
         FileUtils.create_file_with_content(script_path, script_content)
         get_logger().info("Script file created in '%s'", script_path)
         FileUtils.set_file_execution_rights(script_path)
         get_logger().info("Executing user defined script: '%s'",
                           script_path)
         try:
             pyinstaller_library_path = SysUtils.get_env_var(
                 'LD_LIBRARY_PATH')
             orig_library_path = SysUtils.get_env_var(
                 'LD_LIBRARY_PATH_ORIG')
             if orig_library_path:
                 SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path)
             self.output = subprocess.check_output(
                 ['/bin/sh', script_path],
                 stderr=subprocess.STDOUT).decode("latin-1")
             SysUtils.set_env_var('LD_LIBRARY_PATH',
                                  pyinstaller_library_path)
             get_logger().debug("CONTAINER OUTPUT:\n %s", self.output)
         except subprocess.CalledProcessError as cpe:
             # Exit with user script return code if an
             # error occurs (Kubernetes handles the error)
             get_logger().error(cpe.output.decode('latin-1'))
             sys.exit(cpe.returncode)
     else:
         get_logger().error('No user script found!')
Esempio n. 3
0
 def _execute_batch(self):
     batch_ri = Batch(self.lambda_instance).invoke_batch_function()
     batch_logs = (f"Job delegated to batch.\n"
                   f"Check batch logs with:\n"
                   f"  scar log -n {self.lambda_instance.get_function_name()} -ri {batch_ri}")
     get_logger().info(batch_logs)
     self.body["udocker_output"] = batch_logs.encode('utf-8')
Esempio n. 4
0
def parse_event(event):
    """Parses the received event and
    returns the appropriate event class."""
    # Make sure the event is always stored
    parsed_event = None
    if not isinstance(event, dict):
        try:
            event = json.loads(event)
        except ValueError:
            return UnknownEvent(event)
    # Applies the event identification flow
    if _is_api_gateway_event(event):
        get_logger().info("API Gateway event found.")
        parsed_event = ApiGatewayEvent(event)
        # Update event info with API request event body
        # to be further processed (if needed)
        if parsed_event.has_json_body():
            event = parsed_event.body
            if not isinstance(parsed_event.body, dict):
                event = json.loads(parsed_event.body)
    if _is_storage_event(event):
        get_logger().info("Storage event found.")
        parsed_event = _parse_storage_event(event)
        # Store 'object_key' in environment variable
        SysUtils.set_env_var("STORAGE_OBJECT_KEY", parsed_event.object_key)
    return parsed_event if parsed_event else UnknownEvent(event)
Esempio n. 5
0
 def _create_container(self):
     if self._is_container_available():
         get_logger().info("Container already available")
     else:
         get_logger().info("Creating container based on image '%s'.",
                           self.cont_img_id)
         SysUtils.execute_cmd(self._create_udocker_container_cmd())
     SysUtils.execute_cmd(self._set_udocker_container_execution_mode_cmd())
Esempio n. 6
0
 def upload_file(self, file_path, file_name, output_path):
     """Uploads the file to the S3 output path."""
     file_key = get_file_key(output_path, file_name)
     bucket_name = get_bucket_name(output_path)
     get_logger().info('Uploading file \'%s\' to bucket \'%s\'', file_key,
                       bucket_name)
     with open(file_path, 'rb') as data:
         self.client.upload_fileobj(data, bucket_name, file_key)
Esempio n. 7
0
def main(event, context=None):
    """Initializes the generic supervisor
    and launches its execution."""
    configure_logger()
    get_logger().debug("EVENT received: %s", event)
    if context:
        get_logger().debug("CONTEXT received: %s", context)
    supervisor = Supervisor(event, context)
    return supervisor.run()
Esempio n. 8
0
 def _create_image(self):
     if self._is_container_image_downloaded():
         get_logger().info("Container image '%s' already available",
                           self.cont_img_id)
     else:
         if SysUtils.is_var_in_env("IMAGE_FILE"):
             self._load_local_container_image()
         else:
             self._download_container_image()
Esempio n. 9
0
 def upload_file(self, file_path, file_name):
     url = (f'https://{self.oneprovider_host}/{self._CDMI_PATH}/'
            f'{self.oneprovider_space}/{self.stg_path}/{file_name}')
     get_logger().info("Uploading file '%s' to '%s/%s'", file_name,
                       self.oneprovider_space, self.stg_path)
     with open(file_path, 'rb') as data:
         response = requests.put(url, data=data, headers=self.headers)
         if response.status_code not in [201, 202, 204]:
             get_logger().error("Upload failed. Status code: %s",
                                response.status_code)
Esempio n. 10
0
 def _execute_udocker(self):
     try:
         udocker = Udocker(self.lambda_instance)
         udocker.prepare_container()
         self.body["udocker_output"] = udocker.launch_udocker_container()
         get_logger().debug("CONTAINER OUTPUT:\n %s", self.body["udocker_output"])
     except subprocess.TimeoutExpired:
         get_logger().warning("Container execution timed out")
         if _is_lambda_batch_execution():
             self._execute_batch()
Esempio n. 11
0
    def download_input(self, parsed_event, input_dir_path):
        """Receives the event where the file information is and
        the tmp_dir_path where to store the downloaded file.

        Returns the file path where the file is downloaded."""
        auth_data = self._get_input_auth_data(parsed_event)
        stg_provider = create_provider(auth_data)
        get_logger().info('Found \'%s\' input provider',
                          stg_provider.get_type())
        return stg_provider.download_file(parsed_event, input_dir_path)
Esempio n. 12
0
def upload_output(storage_provider, output_dir_path):
    """Receives the tmp_dir_path where the files to upload are stored and
    uploads all the files found there."""

    get_logger().info("Searching for files to upload in folder '%s'", output_dir_path)
    output_files = FileUtils.get_all_files_in_dir(output_dir_path)
    get_logger().info("Found the following files to upload: '%s'", output_files)
    for file_path in output_files:
        file_name = file_path.replace(f"{output_dir_path}/", "")
        storage_provider.upload_file(file_path, file_name)
Esempio n. 13
0
    def upload_file(self, file_path, file_name):
        file_key = self._get_file_key(file_name)
        bucket_name = self._get_bucket_name()
        get_logger().info("Uploading file '%s' to bucket '%s'", file_key,
                          bucket_name)
        with open(file_path, 'rb') as data:
            _get_client().upload_fileobj(data, bucket_name, file_key)

        get_logger().info(
            "Changing ACLs for public-read for object in bucket '%s' with key '%s'",
            bucket_name, file_key)
Esempio n. 14
0
 def _get_script_path(self):
     script_path = None
     if SysUtils.is_var_in_env('SCRIPT'):
         script_path = SysUtils.join_paths(
             SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME)
         script_content = StrUtils.base64_to_str(
             SysUtils.get_env_var('SCRIPT'))
         FileUtils.create_file_with_content(script_path, script_content)
         get_logger().info("Script file created in '%s'", script_path)
     elif FileUtils.is_file(self._OSCAR_SCRIPT_PATH):
         script_path = self._OSCAR_SCRIPT_PATH
     return script_path
Esempio n. 15
0
 def create_error_response(self):
     exception_msg = traceback.format_exc()
     get_logger().error("Exception launched:\n %s", exception_msg)
     return {
         "statusCode": 500,
         "headers": {
             "amz-lambda-request-id": self.lambda_instance.get_request_id(),
             "amz-log-group-name": self.lambda_instance.get_log_group_name(),
             "amz-log-stream-name": self.lambda_instance.get_log_stream_name()
         },
         "body": StrUtils.dict_to_base64str({"exception" : exception_msg}),
         "isBase64Encoded": True,
     }
Esempio n. 16
0
    def _parse_input(self):
        """Download input data from storage provider
        or save data from POST request.

        A function can have information from several storage providers
        but one event always represents only one file (so far), so only
        one provider is going to be used for each event received.
        """
        input_file_path = self.stg_config.download_input(self.parsed_event,
                                                         self.input_tmp_dir.name)
        if input_file_path and FileUtils.is_file(input_file_path):
            SysUtils.set_env_var('INPUT_FILE_PATH', input_file_path)
            get_logger().info('INPUT_FILE_PATH variable set to \'%s\'', input_file_path)
Esempio n. 17
0
 def _execute_udocker(self):
     try:
         udocker = Udocker(self.lambda_instance)
         udocker.prepare_container()
         self.body["udocker_output"] = udocker.launch_udocker_container()
         get_logger().debug(
             "CONTAINER OUTPUT:\n %s",
             self.body["udocker_output"].decode(encoding='utf-8',
                                                errors='ignore'))
     except (subprocess.TimeoutExpired, ContainerTimeoutExpiredWarning):
         get_logger().warning("Container execution timed out")
         if _is_lambda_batch_execution():
             self._execute_batch()
Esempio n. 18
0
 def execute_function(self):
     script_path = self._get_script_path()
     if script_path:
         try:
             pyinstaller_library_path = SysUtils.get_env_var(
                 'LD_LIBRARY_PATH')
             orig_library_path = SysUtils.get_env_var(
                 'LD_LIBRARY_PATH_ORIG')
             if orig_library_path:
                 SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path)
             else:
                 SysUtils.delete_env_var('LD_LIBRARY_PATH')
             proc = subprocess.Popen(['/bin/bash', script_path],
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.STDOUT,
                                     encoding='utf-8',
                                     errors='ignore')
             SysUtils.set_env_var('LD_LIBRARY_PATH',
                                  pyinstaller_library_path)
             get_logger().debug("CONTAINER OUTPUT:\n %s", self.output)
             for line in proc.stdout:
                 get_logger().debug(line.strip())
                 self.output = self.output + line
         except subprocess.CalledProcessError as cpe:
             # Exit with user script return code if an
             # error occurs (Kubernetes handles the error)
             get_logger().error(
                 cpe.output.decode(encoding='utf-8', errors='ignore'))
             sys.exit(cpe.returncode)
     else:
         get_logger().error('No user script found!')
Esempio n. 19
0
    def download_file(self, parsed_event, input_dir_path):
        """Downloads a file from a minio bucket."""
        file_download_path = SysUtils.join_paths(input_dir_path,
                                                 parsed_event.file_name)
        get_logger().info("Downloading item from bucket '%s' with key '%s'",
                          parsed_event.bucket_name, parsed_event.file_name)

        with open(file_download_path, 'wb') as data:
            self._get_client().download_fileobj(parsed_event.bucket_name,
                                                parsed_event.file_name, data)
        get_logger().info(
            "Successful download of file '%s' from bucket '%s' in path '%s'",
            parsed_event.file_name, parsed_event.bucket_name,
            file_download_path)
        return file_download_path
Esempio n. 20
0
 def download_file(self, parsed_event, input_dir_path):
     """ Downloads the file from the S3 bucket and
     returns the path were the download is placed. """
     file_download_path = SysUtils.join_paths(input_dir_path,
                                              parsed_event.file_name)
     get_logger().info("Downloading item from bucket '%s' with key '%s'",
                       parsed_event.bucket_name, parsed_event.object_key)
     with open(file_download_path, 'wb') as data:
         _get_client().download_fileobj(parsed_event.bucket_name,
                                        parsed_event.object_key, data)
     get_logger().info(
         "Successful download of file '%s' from bucket '%s' in path '%s'",
         parsed_event.object_key, parsed_event.bucket_name,
         file_download_path)
     return file_download_path
Esempio n. 21
0
 def upload_output(self, output_dir_path):
     """Receives the tmp_dir_path where the files to upload are stored and
     uploads files whose name matches the prefixes and suffixes specified
     in 'output'."""
     get_logger().info('Searching for files to upload in folder \'%s\'',
                       output_dir_path)
     output_files = FileUtils.get_all_files_in_dir(output_dir_path)
     stg_providers = {}
     # Filter files by prefix and suffix
     for output in self.output:
         get_logger().info(
             'Checking files for uploading to \'%s\' on path: \'%s\'',
             output['storage_provider'], output['path'])
         provider_type = StrUtils.get_storage_type(
             output['storage_provider'])
         provider_id = StrUtils.get_storage_id(output['storage_provider'])
         for file_path in output_files:
             file_name = file_path.replace(f'{output_dir_path}/', '')
             prefix_ok = False
             suffix_ok = False
             # Check prefixes
             if ('prefix' not in output or len(output['prefix']) == 0):
                 prefix_ok = True
             else:
                 for pref in output['prefix']:
                     if file_name.startswith(pref):
                         prefix_ok = True
                         break
             if prefix_ok:
                 # Check suffixes
                 if ('suffix' not in output or len(output['suffix']) == 0):
                     suffix_ok = True
                 else:
                     for suff in output['suffix']:
                         if file_name.endswith(suff):
                             suffix_ok = True
                             break
                 # Only upload file if name matches the prefixes and suffixes
                 if suffix_ok:
                     if provider_type not in stg_providers:
                         stg_providers[provider_type] = {}
                     if provider_id not in stg_providers[provider_type]:
                         auth_data = self._get_auth_data(
                             provider_type, provider_id)
                         stg_providers[provider_type][
                             provider_id] = create_provider(auth_data)
                     stg_providers[provider_type][provider_id].upload_file(
                         file_path, file_name, output['path'])
Esempio n. 22
0
def get_output_paths():
    """Returns the defined output providers.

    Reads the global variables to create the providers needed.
    Variable schema: STORAGE_PATH_$1_$2
                     $1: INPUT | OUTPUT
                     $2: STORAGE_ID (Specified in the function definition file,
                                     is unique for each storage defined)
    e.g.: STORAGE_PATH_INPUT_12345
    """
    get_logger().info("Reading output path variables")
    env_vars = SysUtils.get_filtered_env_vars("STORAGE_PATH_")
    storage_path = namedtuple('storage_path', ['id', 'path'])
    # Remove the storage type 'OUTPUT_' and store only the id and the path
    # Store a tuple, so the information can't be modified
    return [storage_path(env_key[7:], env_val) for env_key, env_val in env_vars.items()
            if env_key.startswith('OUTPUT_')]
Esempio n. 23
0
def parse_event(event):
    """Parses the received event and
    returns the appropriate event class."""
    # Make sure the event is always stored
    parsed_event = None
    if not isinstance(event, dict):
        event = json.loads(event)
    # Applies the event identification flow
    if _is_api_gateway_event(event):
        get_logger().info("API Gateway event found.")
        parsed_event = ApiGatewayEvent(event)
        # Update event info with API request event body
        # to be further processed (if needed)
        if parsed_event.has_json_body():
            event = parsed_event.body
            if not isinstance(parsed_event.body, dict):
                event = json.loads(parsed_event.body)
    if _is_storage_event(event):
        get_logger().info("Storage event found.")
        parsed_event = _parse_storage_event(event)
    return parsed_event if parsed_event else UnknownEvent(event)
Esempio n. 24
0
 def _parse_config(self):
     # Read output list
     output = ConfigUtils.read_cfg_var('output')
     if output != '':
         self.output = output
     else:
         get_logger().warning(
             'There is no output defined for this function.')
     # Read input list
     input = ConfigUtils.read_cfg_var('input')
     if input != '':
         self.input = input
     else:
         get_logger().warning(
             'There is no input defined for this function.')
     # Read storage_providers dict
     storage_providers = ConfigUtils.read_cfg_var('storage_providers')
     if (storage_providers and storage_providers != ''):
         # s3 storage provider auth
         if ('s3' in storage_providers and storage_providers['s3']):
             self._validate_s3_creds(storage_providers['s3'])
         # minio storage provider auth
         if ('minio' in storage_providers and storage_providers['minio']):
             self._validate_minio_creds(storage_providers['minio'])
         # onedata storage provider auth
         if ('onedata' in storage_providers
                 and storage_providers['onedata']):
             self._validate_onedata_creds(storage_providers['onedata'])
     else:
         get_logger().warning(
             'There is no storage provider defined for this function.')
Esempio n. 25
0
 def execute_function(self):
     script_path = self._get_script_path()
     if script_path:
         try:
             pyinstaller_library_path = SysUtils.get_env_var(
                 'LD_LIBRARY_PATH')
             orig_library_path = SysUtils.get_env_var(
                 'LD_LIBRARY_PATH_ORIG')
             if orig_library_path:
                 SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path)
             else:
                 SysUtils.delete_env_var('LD_LIBRARY_PATH')
             self.output = subprocess.check_output(
                 ['/bin/sh', script_path],
                 stderr=subprocess.STDOUT).decode("latin-1")
             SysUtils.set_env_var('LD_LIBRARY_PATH',
                                  pyinstaller_library_path)
             get_logger().debug("CONTAINER OUTPUT:\n %s", self.output)
         except subprocess.CalledProcessError as cpe:
             # Exit with user script return code if an
             # error occurs (Kubernetes handles the error)
             get_logger().error(cpe.output.decode('latin-1'))
             sys.exit(cpe.returncode)
     else:
         get_logger().error('No user script found!')
Esempio n. 26
0
 def launch_udocker_container(self):
     """Launches the udocker container.
     If the execution time of the container exceeds the defined execution time,
     the container is killed and a warning is raised."""
     remaining_seconds = self.lambda_instance.get_remaining_time_in_seconds(
     )
     get_logger().info(
         "Executing udocker container. Timeout set to '%d' seconds",
         remaining_seconds)
     get_logger().debug("Udocker command: '%s'", self.cont_cmd)
     with open(self._CONTAINER_OUTPUT_FILE, "wb") as out:
         with subprocess.Popen(self.cont_cmd,
                               stderr=subprocess.STDOUT,
                               stdout=out,
                               start_new_session=True) as process:
             try:
                 process.wait(timeout=remaining_seconds)
             except subprocess.TimeoutExpired:
                 get_logger().info("Stopping process '%s'", process)
                 process.kill()
                 raise ContainerTimeoutExpiredWarning()
     udocker_output = b''
     if FileUtils.is_file(self._CONTAINER_OUTPUT_FILE):
         udocker_output = FileUtils.read_file(self._CONTAINER_OUTPUT_FILE,
                                              file_mode="rb")
     return udocker_output
Esempio n. 27
0
    def download_file(self, parsed_event, input_dir_path):
        """Downloads the file from the space of Onedata and
        returns the path were the download is placed. """
        file_download_path = ""
        url = f'https://{self.oneprovider_host}{self._CDMI_PATH}{parsed_event.object_key}'
        get_logger().info('Downloading item from host \'%s\' with key \'%s\'',
                          self.oneprovider_host, parsed_event.object_key)
        response = requests.get(url, headers=self.headers)
        if response.status_code == 200:
            file_download_path = SysUtils.join_paths(input_dir_path,
                                                     parsed_event.file_name)
            FileUtils.create_file_with_content(file_download_path,
                                               response.content,
                                               mode='wb')

            get_logger().info(
                'Successful download of file \'%s\' with key \'%s\' in path \'%s\'',
                parsed_event.file_name, parsed_event.object_key,
                file_download_path)
        else:
            raise OnedataDownloadError(file_name=parsed_event.object_key,
                                       status_code=response.status_code)
        return file_download_path
Esempio n. 28
0
 def upload_file(self, file_path, file_name, output_path):
     """Uploads the file to the Onedata output path."""
     file_name = file_name.strip('/')
     output_path = output_path.strip('/')
     upload_path = f'{output_path}/{file_name}'
     upload_folder = FileUtils.get_dir_name(upload_path)
     # Create output folder (and subfolders) if it does not exists
     if not self._folder_exists(upload_folder):
         folders = upload_folder.split('/')
         path = ''
         for folder in folders:
             path = f'{path}/{folder}'
             if not self._folder_exists(path):
                 self._create_folder(path)
     # Upload the file
     url = (f'https://{self.oneprovider_host}{self._CDMI_PATH}/'
            f'{self.oneprovider_space}/{upload_path}')
     get_logger().info('Uploading file \'%s\' to space \'%s\'', upload_path,
                       self.oneprovider_space)
     with open(file_path, 'rb') as data:
         response = requests.put(url, data=data, headers=self.headers)
         if response.status_code not in [201, 202, 204]:
             raise OnedataUploadError(file_name=file_name,
                                      status_code=response.status_code)
Esempio n. 29
0
        def wrapper(*args, **kwargs):
            try:
                return func(*args, **kwargs)

            except ClientError as cerr:
                print(f"There was an exception in {func.__name__}")
                print(cerr.response['Error']['Message'])
                get_logger().error(cerr)
                sys.exit(1)

            except FaasSupervisorError as fse:
                print(fse.args[0])
                get_logger().error(fse)
                if 'Warning' in fse.__class__.__name__:
                    get_logger().warning(fse)
                # Finish the execution if it's an error
                if 'Error' in fse.__class__.__name__:
                    get_logger().error(fse)
                    sys.exit(1)
Esempio n. 30
0
 def run(self):
     """Generic method to launch the supervisor execution."""
     try:
         get_logger().info('Executing function')
         SysUtils.set_env_var('INPUT_DATA', str(self.inputs))
         self.supervisor.execute_function()
         # if is_batch_execution() and SysUtils.is_lambda_environment():
         #     # Only delegate to batch
         #     self.supervisor.execute_function()
         # else:
         #     self._parse_input()
         #     self.supervisor.execute_function()
         #     self._parse_output()
         get_logger().info('Creating response')
         return self.supervisor.create_response()
     except FaasSupervisorError as fse:
         get_logger().exception(fse)
         get_logger().error('Creating error response')
         return self.supervisor.create_error_response()