def __init__(self, event, context): if context: get_logger().info('SUPERVISOR: Initializing AWS Lambda supervisor') self.lambda_instance = LambdaInstance(event, context) self.body = {} else: raise NoLambdaContextError()
def execute_function(self): if SysUtils.is_var_in_env('SCRIPT'): script_path = SysUtils.join_paths( SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME) script_content = StrUtils.base64_to_str( SysUtils.get_env_var('SCRIPT')) FileUtils.create_file_with_content(script_path, script_content) get_logger().info("Script file created in '%s'", script_path) FileUtils.set_file_execution_rights(script_path) get_logger().info("Executing user defined script: '%s'", script_path) try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) self.output = subprocess.check_output( ['/bin/sh', script_path], stderr=subprocess.STDOUT).decode("latin-1") SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error(cpe.output.decode('latin-1')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def _execute_batch(self): batch_ri = Batch(self.lambda_instance).invoke_batch_function() batch_logs = (f"Job delegated to batch.\n" f"Check batch logs with:\n" f" scar log -n {self.lambda_instance.get_function_name()} -ri {batch_ri}") get_logger().info(batch_logs) self.body["udocker_output"] = batch_logs.encode('utf-8')
def parse_event(event): """Parses the received event and returns the appropriate event class.""" # Make sure the event is always stored parsed_event = None if not isinstance(event, dict): try: event = json.loads(event) except ValueError: return UnknownEvent(event) # Applies the event identification flow if _is_api_gateway_event(event): get_logger().info("API Gateway event found.") parsed_event = ApiGatewayEvent(event) # Update event info with API request event body # to be further processed (if needed) if parsed_event.has_json_body(): event = parsed_event.body if not isinstance(parsed_event.body, dict): event = json.loads(parsed_event.body) if _is_storage_event(event): get_logger().info("Storage event found.") parsed_event = _parse_storage_event(event) # Store 'object_key' in environment variable SysUtils.set_env_var("STORAGE_OBJECT_KEY", parsed_event.object_key) return parsed_event if parsed_event else UnknownEvent(event)
def _create_container(self): if self._is_container_available(): get_logger().info("Container already available") else: get_logger().info("Creating container based on image '%s'.", self.cont_img_id) SysUtils.execute_cmd(self._create_udocker_container_cmd()) SysUtils.execute_cmd(self._set_udocker_container_execution_mode_cmd())
def upload_file(self, file_path, file_name, output_path): """Uploads the file to the S3 output path.""" file_key = get_file_key(output_path, file_name) bucket_name = get_bucket_name(output_path) get_logger().info('Uploading file \'%s\' to bucket \'%s\'', file_key, bucket_name) with open(file_path, 'rb') as data: self.client.upload_fileobj(data, bucket_name, file_key)
def main(event, context=None): """Initializes the generic supervisor and launches its execution.""" configure_logger() get_logger().debug("EVENT received: %s", event) if context: get_logger().debug("CONTEXT received: %s", context) supervisor = Supervisor(event, context) return supervisor.run()
def _create_image(self): if self._is_container_image_downloaded(): get_logger().info("Container image '%s' already available", self.cont_img_id) else: if SysUtils.is_var_in_env("IMAGE_FILE"): self._load_local_container_image() else: self._download_container_image()
def upload_file(self, file_path, file_name): url = (f'https://{self.oneprovider_host}/{self._CDMI_PATH}/' f'{self.oneprovider_space}/{self.stg_path}/{file_name}') get_logger().info("Uploading file '%s' to '%s/%s'", file_name, self.oneprovider_space, self.stg_path) with open(file_path, 'rb') as data: response = requests.put(url, data=data, headers=self.headers) if response.status_code not in [201, 202, 204]: get_logger().error("Upload failed. Status code: %s", response.status_code)
def _execute_udocker(self): try: udocker = Udocker(self.lambda_instance) udocker.prepare_container() self.body["udocker_output"] = udocker.launch_udocker_container() get_logger().debug("CONTAINER OUTPUT:\n %s", self.body["udocker_output"]) except subprocess.TimeoutExpired: get_logger().warning("Container execution timed out") if _is_lambda_batch_execution(): self._execute_batch()
def download_input(self, parsed_event, input_dir_path): """Receives the event where the file information is and the tmp_dir_path where to store the downloaded file. Returns the file path where the file is downloaded.""" auth_data = self._get_input_auth_data(parsed_event) stg_provider = create_provider(auth_data) get_logger().info('Found \'%s\' input provider', stg_provider.get_type()) return stg_provider.download_file(parsed_event, input_dir_path)
def upload_output(storage_provider, output_dir_path): """Receives the tmp_dir_path where the files to upload are stored and uploads all the files found there.""" get_logger().info("Searching for files to upload in folder '%s'", output_dir_path) output_files = FileUtils.get_all_files_in_dir(output_dir_path) get_logger().info("Found the following files to upload: '%s'", output_files) for file_path in output_files: file_name = file_path.replace(f"{output_dir_path}/", "") storage_provider.upload_file(file_path, file_name)
def upload_file(self, file_path, file_name): file_key = self._get_file_key(file_name) bucket_name = self._get_bucket_name() get_logger().info("Uploading file '%s' to bucket '%s'", file_key, bucket_name) with open(file_path, 'rb') as data: _get_client().upload_fileobj(data, bucket_name, file_key) get_logger().info( "Changing ACLs for public-read for object in bucket '%s' with key '%s'", bucket_name, file_key)
def _get_script_path(self): script_path = None if SysUtils.is_var_in_env('SCRIPT'): script_path = SysUtils.join_paths( SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME) script_content = StrUtils.base64_to_str( SysUtils.get_env_var('SCRIPT')) FileUtils.create_file_with_content(script_path, script_content) get_logger().info("Script file created in '%s'", script_path) elif FileUtils.is_file(self._OSCAR_SCRIPT_PATH): script_path = self._OSCAR_SCRIPT_PATH return script_path
def create_error_response(self): exception_msg = traceback.format_exc() get_logger().error("Exception launched:\n %s", exception_msg) return { "statusCode": 500, "headers": { "amz-lambda-request-id": self.lambda_instance.get_request_id(), "amz-log-group-name": self.lambda_instance.get_log_group_name(), "amz-log-stream-name": self.lambda_instance.get_log_stream_name() }, "body": StrUtils.dict_to_base64str({"exception" : exception_msg}), "isBase64Encoded": True, }
def _parse_input(self): """Download input data from storage provider or save data from POST request. A function can have information from several storage providers but one event always represents only one file (so far), so only one provider is going to be used for each event received. """ input_file_path = self.stg_config.download_input(self.parsed_event, self.input_tmp_dir.name) if input_file_path and FileUtils.is_file(input_file_path): SysUtils.set_env_var('INPUT_FILE_PATH', input_file_path) get_logger().info('INPUT_FILE_PATH variable set to \'%s\'', input_file_path)
def _execute_udocker(self): try: udocker = Udocker(self.lambda_instance) udocker.prepare_container() self.body["udocker_output"] = udocker.launch_udocker_container() get_logger().debug( "CONTAINER OUTPUT:\n %s", self.body["udocker_output"].decode(encoding='utf-8', errors='ignore')) except (subprocess.TimeoutExpired, ContainerTimeoutExpiredWarning): get_logger().warning("Container execution timed out") if _is_lambda_batch_execution(): self._execute_batch()
def execute_function(self): script_path = self._get_script_path() if script_path: try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) else: SysUtils.delete_env_var('LD_LIBRARY_PATH') proc = subprocess.Popen(['/bin/bash', script_path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8', errors='ignore') SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) for line in proc.stdout: get_logger().debug(line.strip()) self.output = self.output + line except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error( cpe.output.decode(encoding='utf-8', errors='ignore')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def download_file(self, parsed_event, input_dir_path): """Downloads a file from a minio bucket.""" file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) get_logger().info("Downloading item from bucket '%s' with key '%s'", parsed_event.bucket_name, parsed_event.file_name) with open(file_download_path, 'wb') as data: self._get_client().download_fileobj(parsed_event.bucket_name, parsed_event.file_name, data) get_logger().info( "Successful download of file '%s' from bucket '%s' in path '%s'", parsed_event.file_name, parsed_event.bucket_name, file_download_path) return file_download_path
def download_file(self, parsed_event, input_dir_path): """ Downloads the file from the S3 bucket and returns the path were the download is placed. """ file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) get_logger().info("Downloading item from bucket '%s' with key '%s'", parsed_event.bucket_name, parsed_event.object_key) with open(file_download_path, 'wb') as data: _get_client().download_fileobj(parsed_event.bucket_name, parsed_event.object_key, data) get_logger().info( "Successful download of file '%s' from bucket '%s' in path '%s'", parsed_event.object_key, parsed_event.bucket_name, file_download_path) return file_download_path
def upload_output(self, output_dir_path): """Receives the tmp_dir_path where the files to upload are stored and uploads files whose name matches the prefixes and suffixes specified in 'output'.""" get_logger().info('Searching for files to upload in folder \'%s\'', output_dir_path) output_files = FileUtils.get_all_files_in_dir(output_dir_path) stg_providers = {} # Filter files by prefix and suffix for output in self.output: get_logger().info( 'Checking files for uploading to \'%s\' on path: \'%s\'', output['storage_provider'], output['path']) provider_type = StrUtils.get_storage_type( output['storage_provider']) provider_id = StrUtils.get_storage_id(output['storage_provider']) for file_path in output_files: file_name = file_path.replace(f'{output_dir_path}/', '') prefix_ok = False suffix_ok = False # Check prefixes if ('prefix' not in output or len(output['prefix']) == 0): prefix_ok = True else: for pref in output['prefix']: if file_name.startswith(pref): prefix_ok = True break if prefix_ok: # Check suffixes if ('suffix' not in output or len(output['suffix']) == 0): suffix_ok = True else: for suff in output['suffix']: if file_name.endswith(suff): suffix_ok = True break # Only upload file if name matches the prefixes and suffixes if suffix_ok: if provider_type not in stg_providers: stg_providers[provider_type] = {} if provider_id not in stg_providers[provider_type]: auth_data = self._get_auth_data( provider_type, provider_id) stg_providers[provider_type][ provider_id] = create_provider(auth_data) stg_providers[provider_type][provider_id].upload_file( file_path, file_name, output['path'])
def get_output_paths(): """Returns the defined output providers. Reads the global variables to create the providers needed. Variable schema: STORAGE_PATH_$1_$2 $1: INPUT | OUTPUT $2: STORAGE_ID (Specified in the function definition file, is unique for each storage defined) e.g.: STORAGE_PATH_INPUT_12345 """ get_logger().info("Reading output path variables") env_vars = SysUtils.get_filtered_env_vars("STORAGE_PATH_") storage_path = namedtuple('storage_path', ['id', 'path']) # Remove the storage type 'OUTPUT_' and store only the id and the path # Store a tuple, so the information can't be modified return [storage_path(env_key[7:], env_val) for env_key, env_val in env_vars.items() if env_key.startswith('OUTPUT_')]
def parse_event(event): """Parses the received event and returns the appropriate event class.""" # Make sure the event is always stored parsed_event = None if not isinstance(event, dict): event = json.loads(event) # Applies the event identification flow if _is_api_gateway_event(event): get_logger().info("API Gateway event found.") parsed_event = ApiGatewayEvent(event) # Update event info with API request event body # to be further processed (if needed) if parsed_event.has_json_body(): event = parsed_event.body if not isinstance(parsed_event.body, dict): event = json.loads(parsed_event.body) if _is_storage_event(event): get_logger().info("Storage event found.") parsed_event = _parse_storage_event(event) return parsed_event if parsed_event else UnknownEvent(event)
def _parse_config(self): # Read output list output = ConfigUtils.read_cfg_var('output') if output != '': self.output = output else: get_logger().warning( 'There is no output defined for this function.') # Read input list input = ConfigUtils.read_cfg_var('input') if input != '': self.input = input else: get_logger().warning( 'There is no input defined for this function.') # Read storage_providers dict storage_providers = ConfigUtils.read_cfg_var('storage_providers') if (storage_providers and storage_providers != ''): # s3 storage provider auth if ('s3' in storage_providers and storage_providers['s3']): self._validate_s3_creds(storage_providers['s3']) # minio storage provider auth if ('minio' in storage_providers and storage_providers['minio']): self._validate_minio_creds(storage_providers['minio']) # onedata storage provider auth if ('onedata' in storage_providers and storage_providers['onedata']): self._validate_onedata_creds(storage_providers['onedata']) else: get_logger().warning( 'There is no storage provider defined for this function.')
def execute_function(self): script_path = self._get_script_path() if script_path: try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) else: SysUtils.delete_env_var('LD_LIBRARY_PATH') self.output = subprocess.check_output( ['/bin/sh', script_path], stderr=subprocess.STDOUT).decode("latin-1") SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error(cpe.output.decode('latin-1')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def launch_udocker_container(self): """Launches the udocker container. If the execution time of the container exceeds the defined execution time, the container is killed and a warning is raised.""" remaining_seconds = self.lambda_instance.get_remaining_time_in_seconds( ) get_logger().info( "Executing udocker container. Timeout set to '%d' seconds", remaining_seconds) get_logger().debug("Udocker command: '%s'", self.cont_cmd) with open(self._CONTAINER_OUTPUT_FILE, "wb") as out: with subprocess.Popen(self.cont_cmd, stderr=subprocess.STDOUT, stdout=out, start_new_session=True) as process: try: process.wait(timeout=remaining_seconds) except subprocess.TimeoutExpired: get_logger().info("Stopping process '%s'", process) process.kill() raise ContainerTimeoutExpiredWarning() udocker_output = b'' if FileUtils.is_file(self._CONTAINER_OUTPUT_FILE): udocker_output = FileUtils.read_file(self._CONTAINER_OUTPUT_FILE, file_mode="rb") return udocker_output
def download_file(self, parsed_event, input_dir_path): """Downloads the file from the space of Onedata and returns the path were the download is placed. """ file_download_path = "" url = f'https://{self.oneprovider_host}{self._CDMI_PATH}{parsed_event.object_key}' get_logger().info('Downloading item from host \'%s\' with key \'%s\'', self.oneprovider_host, parsed_event.object_key) response = requests.get(url, headers=self.headers) if response.status_code == 200: file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) FileUtils.create_file_with_content(file_download_path, response.content, mode='wb') get_logger().info( 'Successful download of file \'%s\' with key \'%s\' in path \'%s\'', parsed_event.file_name, parsed_event.object_key, file_download_path) else: raise OnedataDownloadError(file_name=parsed_event.object_key, status_code=response.status_code) return file_download_path
def upload_file(self, file_path, file_name, output_path): """Uploads the file to the Onedata output path.""" file_name = file_name.strip('/') output_path = output_path.strip('/') upload_path = f'{output_path}/{file_name}' upload_folder = FileUtils.get_dir_name(upload_path) # Create output folder (and subfolders) if it does not exists if not self._folder_exists(upload_folder): folders = upload_folder.split('/') path = '' for folder in folders: path = f'{path}/{folder}' if not self._folder_exists(path): self._create_folder(path) # Upload the file url = (f'https://{self.oneprovider_host}{self._CDMI_PATH}/' f'{self.oneprovider_space}/{upload_path}') get_logger().info('Uploading file \'%s\' to space \'%s\'', upload_path, self.oneprovider_space) with open(file_path, 'rb') as data: response = requests.put(url, data=data, headers=self.headers) if response.status_code not in [201, 202, 204]: raise OnedataUploadError(file_name=file_name, status_code=response.status_code)
def wrapper(*args, **kwargs): try: return func(*args, **kwargs) except ClientError as cerr: print(f"There was an exception in {func.__name__}") print(cerr.response['Error']['Message']) get_logger().error(cerr) sys.exit(1) except FaasSupervisorError as fse: print(fse.args[0]) get_logger().error(fse) if 'Warning' in fse.__class__.__name__: get_logger().warning(fse) # Finish the execution if it's an error if 'Error' in fse.__class__.__name__: get_logger().error(fse) sys.exit(1)
def run(self): """Generic method to launch the supervisor execution.""" try: get_logger().info('Executing function') SysUtils.set_env_var('INPUT_DATA', str(self.inputs)) self.supervisor.execute_function() # if is_batch_execution() and SysUtils.is_lambda_environment(): # # Only delegate to batch # self.supervisor.execute_function() # else: # self._parse_input() # self.supervisor.execute_function() # self._parse_output() get_logger().info('Creating response') return self.supervisor.create_response() except FaasSupervisorError as fse: get_logger().exception(fse) get_logger().error('Creating error response') return self.supervisor.create_error_response()