def _add_container_volumes(self): self.cont_cmd.extend(["-v", SysUtils.get_env_var("TMP_INPUT_DIR")]) self.cont_cmd.extend(["-v", SysUtils.get_env_var("TMP_OUTPUT_DIR")]) self.cont_cmd.extend( ["-v", "/dev", "-v", "/proc", "-v", "/etc/hosts", "--nosysdirs"]) if SysUtils.is_var_in_env('EXTRA_PAYLOAD'): self.cont_cmd.extend(["-v", self.lambda_instance.PERMANENT_FOLDER])
def execute_function(self): if SysUtils.is_var_in_env('SCRIPT'): script_path = SysUtils.join_paths( SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME) script_content = StrUtils.base64_to_str( SysUtils.get_env_var('SCRIPT')) FileUtils.create_file_with_content(script_path, script_content) get_logger().info("Script file created in '%s'", script_path) FileUtils.set_file_execution_rights(script_path) get_logger().info("Executing user defined script: '%s'", script_path) try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) self.output = subprocess.check_output( ['/bin/sh', script_path], stderr=subprocess.STDOUT).decode("latin-1") SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error(cpe.output.decode('latin-1')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def parse_event(event): """Parses the received event and returns the appropriate event class.""" # Make sure the event is always stored parsed_event = None if not isinstance(event, dict): try: event = json.loads(event) except ValueError: return UnknownEvent(event) # Applies the event identification flow if _is_api_gateway_event(event): get_logger().info("API Gateway event found.") parsed_event = ApiGatewayEvent(event) # Update event info with API request event body # to be further processed (if needed) if parsed_event.has_json_body(): event = parsed_event.body if not isinstance(parsed_event.body, dict): event = json.loads(parsed_event.body) if _is_storage_event(event): get_logger().info("Storage event found.") parsed_event = _parse_storage_event(event) # Store 'object_key' in environment variable SysUtils.set_env_var("STORAGE_OBJECT_KEY", parsed_event.object_key) return parsed_event if parsed_event else UnknownEvent(event)
def _create_container(self): if self._is_container_available(): get_logger().info("Container already available") else: get_logger().info("Creating container based on image '%s'.", self.cont_img_id) SysUtils.execute_cmd(self._create_udocker_container_cmd()) SysUtils.execute_cmd(self._set_udocker_container_execution_mode_cmd())
def test_get_env_var(self): with mock.patch.dict('os.environ', { "K1": "V1", "K2": "V2" }, clear=True): self.assertEqual(SysUtils.get_env_var("K2"), "V2") self.assertEqual(SysUtils.get_env_var("K3"), "")
def _add_aws_access_keys(self): self.cont_cmd.extend( _parse_cont_env_var("AWS_ACCESS_KEY_ID", SysUtils.get_env_var("AWS_ACCESS_KEY_ID"))) self.cont_cmd.extend( _parse_cont_env_var("AWS_SECRET_ACCESS_KEY", SysUtils.get_env_var("AWS_SECRET_ACCESS_KEY"))) self.cont_cmd.extend( _parse_cont_env_var("AWS_SESSION_TOKEN", SysUtils.get_env_var("AWS_SESSION_TOKEN")))
def _add_iam_credentials(self): iam_creds = { 'CONT_VAR_AWS_ACCESS_KEY_ID': 'AWS_ACCESS_KEY_ID', 'CONT_VAR_AWS_SECRET_ACCESS_KEY': 'AWS_SECRET_ACCESS_KEY', 'CONT_VAR_AWS_SESSION_TOKEN': 'AWS_SESSION_TOKEN' } # Add IAM credentials for key, value in iam_creds.items(): if SysUtils.is_var_in_env(key): self.cont_cmd.extend( _parse_cont_env_var(value, SysUtils.get_env_var(key)))
def __init__(self, lambda_instance): self.lambda_instance = lambda_instance # Create required udocker folder FileUtils.create_folder(SysUtils.get_env_var("UDOCKER_DIR")) # Init the udocker command that will be executed self.udocker_exec = [SysUtils.get_env_var("UDOCKER_EXEC")] self.cont_cmd = self.udocker_exec + ["--quiet", "run"] self.cont_img_id = ConfigUtils.read_cfg_var('container').get('image') if not self.cont_img_id: raise ContainerImageNotFoundError()
def _create_tmp_dirs(self): """Creates the temporal directories where the input/output data is going to be stored. The folders are deleted automatically when the execution finishes. """ self.input_tmp_dir = FileUtils.create_tmp_dir() self.output_tmp_dir = FileUtils.create_tmp_dir() SysUtils.set_env_var("TMP_INPUT_DIR", self.input_tmp_dir.name) SysUtils.set_env_var("TMP_OUTPUT_DIR", self.output_tmp_dir.name)
def _get_script_path(self): script_path = None if SysUtils.is_var_in_env('SCRIPT'): script_path = SysUtils.join_paths( SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME) script_content = StrUtils.base64_to_str( SysUtils.get_env_var('SCRIPT')) FileUtils.create_file_with_content(script_path, script_content) get_logger().info("Script file created in '%s'", script_path) elif FileUtils.is_file(self._OSCAR_SCRIPT_PATH): script_path = self._OSCAR_SCRIPT_PATH return script_path
def _parse_input(self): """Download input data from storage provider or save data from POST request. A function can have information from several storage providers but one event always represents only one file (so far), so only one provider is going to be used for each event received. """ input_file_path = self.stg_config.download_input(self.parsed_event, self.input_tmp_dir.name) if input_file_path and FileUtils.is_file(input_file_path): SysUtils.set_env_var('INPUT_FILE_PATH', input_file_path) get_logger().info('INPUT_FILE_PATH variable set to \'%s\'', input_file_path)
def execute_function(self): script_path = self._get_script_path() if script_path: try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) else: SysUtils.delete_env_var('LD_LIBRARY_PATH') self.output = subprocess.check_output( ['/bin/sh', script_path], stderr=subprocess.STDOUT).decode("latin-1") SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error(cpe.output.decode('latin-1')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def execute_function(self): script_path = self._get_script_path() if script_path: try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) else: SysUtils.delete_env_var('LD_LIBRARY_PATH') proc = subprocess.Popen(['/bin/bash', script_path], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, encoding='utf-8', errors='ignore') SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) for line in proc.stdout: get_logger().debug(line.strip()) self.output = self.output + line except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error( cpe.output.decode(encoding='utf-8', errors='ignore')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def test_get_cont_env_vars(self): with mock.patch.dict('os.environ', { "K1": "V1", "CONT_VAR_C1": "VC1", "CONT_VAR_C2": "VC2" }, clear=True): # Variables without the prefix self.assertEqual(SysUtils.get_cont_env_vars(), { "C1": "VC1", "C2": "VC2" }) with mock.patch.dict('os.environ', {"K1": "V1"}, clear=True): self.assertEqual(SysUtils.get_cont_env_vars(), {})
def get_remaining_time_in_seconds(self): """Returns the amount of time remaining for the invocation in seconds.""" remaining_time = int(self.context.get_remaining_time_in_millis() / 1000) timeout_threshold = SysUtils.get_env_var('TIMEOUT_THRESHOLD') if timeout_threshold is '': timeout_threshold = ConfigUtils.read_cfg_var('container')['timeout_threshold'] return remaining_time - int(timeout_threshold)
def read_storage_providers(self): """Reads the global variables to create the providers needed. Variable schema: STORAGE_AUTH_$1_$2_$3 $1: MINIO | S3 | ONEDATA $2: USER | PASS | TOKEN | SPACE | HOST $3: STORAGE_ID (Specified in the function definition file, is unique for each storage defined) e.g.: STORAGE_AUTH_MINIO_USER_12345 """ # Remove the prefix 'STORAGE_AUTH_' env_vars = SysUtils.get_filtered_env_vars("STORAGE_AUTH_") # type = MINIO | S3 | ONEDATA ... # cred = USER | PASS | TOKEN ... provider_info = namedtuple('provider_info', ['type', 'cred', 'id']) for env_key, env_val in env_vars.items(): # Don't split past the id # MINIO_USER_123_45 -> *[MINIO, USER, 123_45] prov_info = provider_info(*env_key.split("_", 2)) # Link ID with TYPE if prov_info.id not in self.auth_id: self.auth_id[prov_info.id] = prov_info.type if prov_info.type not in self.auth_type: # Link TYPE with AUTH data self.auth_type[prov_info.type] = AuthData( prov_info.id, prov_info.type) self.auth_type[prov_info.type].set_credential( prov_info.cred, env_val)
def _parse_exec_script_and_commands(self): # Check for script in function event if 'script' in self.raw_event: self.script_path = f"{self.input_folder}/script.sh" script_content = StrUtils.base64_to_str(self.raw_event['script']) FileUtils.create_file_with_content(self.script_path, script_content) # Container invoked with arguments elif 'cmd_args' in self.raw_event: # Add args self.cmd_args = json.loads(self.raw_event['cmd_args']) # Script to be executed every time (if defined) elif SysUtils.is_var_in_env('INIT_SCRIPT_PATH'): # Add init script self.init_script_path = f"{self.input_folder}/init_script.sh" FileUtils.cp_file(SysUtils.get_env_var("INIT_SCRIPT_PATH"), self.init_script_path)
def _create_image(self): if self._is_container_image_downloaded(): get_logger().info("Container image '%s' already available", self.cont_img_id) else: if SysUtils.is_var_in_env("IMAGE_FILE"): self._load_local_container_image() else: self._download_container_image()
def save_event(self, input_dir_path): file_path = SysUtils.join_paths(input_dir_path, self._FILE_NAME) if self.has_json_body(): FileUtils.create_file_with_content(file_path, self.body) else: FileUtils.create_file_with_content(file_path, base64.b64decode(self.body), mode='wb') return file_path
def _get_client(self): """Return Minio client with user configuration.""" endpoint = SysUtils.get_env_var('MINIO_ENDPOINT') if not endpoint: endpoint = self._DEFAULT_MINIO_ENDPOINT return boto3.client( 's3', endpoint_url=endpoint, aws_access_key_id=self.stg_auth.get_credential('USER'), aws_secret_access_key=self.stg_auth.get_credential('PASS'))
def test_get_all_env_vars(self): with mock.patch.dict('os.environ', { "K1": "V1", "K2": "V2" }, clear=True): self.assertEqual(SysUtils.get_all_env_vars(), { "K1": "V1", "K2": "V2" })
def run(self): """Generic method to launch the supervisor execution.""" try: get_logger().info('Executing function') SysUtils.set_env_var('INPUT_DATA', str(self.inputs)) self.supervisor.execute_function() # if is_batch_execution() and SysUtils.is_lambda_environment(): # # Only delegate to batch # self.supervisor.execute_function() # else: # self._parse_input() # self.supervisor.execute_function() # self._parse_output() get_logger().info('Creating response') return self.supervisor.create_response() except FaasSupervisorError as fse: get_logger().exception(fse) get_logger().error('Creating error response') return self.supervisor.create_error_response()
def _create_supervisor(event, context=None): """Returns a new supervisor based on the environment. Binary mode by default""" supervisor = None if SysUtils.is_lambda_environment(): supervisor = LambdaSupervisor(event, context) else: supervisor = BinarySupervisor() return supervisor
def test_get_filtered_env_vars(self): with mock.patch.dict('os.environ', { "K1": "V1", "F1_C1": "VC1", "F1_C2": "VC2" }, clear=True): self.assertEqual(SysUtils.get_filtered_env_vars("F1_"), { "C1": "VC1", "C2": "VC2" })
def _parse_input(self): """Download input data from storage provider or save data from POST request. A function can have information from several storage providers but one event always represents only one file (so far), so only one provider is going to be used for each event received. """ stg_prov = self._get_input_provider() get_logger().info("Found '%s' input provider", stg_prov.get_type()) if stg_prov: get_logger().info("Downloading input file using '%s' event", self.parsed_event.get_type()) input_file_path = storage.download_input( stg_prov, self.parsed_event, SysUtils.get_env_var("TMP_INPUT_DIR")) if input_file_path and FileUtils.is_file(input_file_path): SysUtils.set_env_var("INPUT_FILE_PATH", input_file_path) get_logger().info("INPUT_FILE_PATH variable set to '%s'", input_file_path)
def download_file(self, parsed_event, input_dir_path): """ Downloads the file from the S3 bucket and returns the path were the download is placed. """ file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) get_logger().info("Downloading item from bucket '%s' with key '%s'", parsed_event.bucket_name, parsed_event.object_key) with open(file_download_path, 'wb') as data: _get_client().download_fileobj(parsed_event.bucket_name, parsed_event.object_key, data) get_logger().info( "Successful download of file '%s' from bucket '%s' in path '%s'", parsed_event.object_key, parsed_event.bucket_name, file_download_path) return file_download_path
def download_file(self, parsed_event, input_dir_path): """Downloads a file from a minio bucket.""" file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) get_logger().info("Downloading item from bucket '%s' with key '%s'", parsed_event.bucket_name, parsed_event.file_name) with open(file_download_path, 'wb') as data: self._get_client().download_fileobj(parsed_event.bucket_name, parsed_event.file_name, data) get_logger().info( "Successful download of file '%s' from bucket '%s' in path '%s'", parsed_event.file_name, parsed_event.bucket_name, file_download_path) return file_download_path
def save_event(self, input_dir_path): """Stores the unknown event and returns the file path where the file is stored.""" file_path = SysUtils.join_paths(input_dir_path, self._file_name) try: json.loads(self.event) except ValueError: FileUtils.create_file_with_content(file_path, base64.b64decode(self.event), mode='wb') except TypeError: FileUtils.create_file_with_content(file_path, self.event) else: FileUtils.create_file_with_content(file_path, self.event) return file_path
def get_output_paths(): """Returns the defined output providers. Reads the global variables to create the providers needed. Variable schema: STORAGE_PATH_$1_$2 $1: INPUT | OUTPUT $2: STORAGE_ID (Specified in the function definition file, is unique for each storage defined) e.g.: STORAGE_PATH_INPUT_12345 """ get_logger().info("Reading output path variables") env_vars = SysUtils.get_filtered_env_vars("STORAGE_PATH_") storage_path = namedtuple('storage_path', ['id', 'path']) # Remove the storage type 'OUTPUT_' and store only the id and the path # Store a tuple, so the information can't be modified return [storage_path(env_key[7:], env_val) for env_key, env_val in env_vars.items() if env_key.startswith('OUTPUT_')]
def download_file(self, parsed_event, input_dir_path): """Downloads the file from the space of Onedata and returns the path were the download is placed. """ file_download_path = "" url = f'https://{self.oneprovider_host}{self._CDMI_PATH}{parsed_event.object_key}' get_logger().info('Downloading item from host \'%s\' with key \'%s\'', self.oneprovider_host, parsed_event.object_key) response = requests.get(url, headers=self.headers) if response.status_code == 200: file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) FileUtils.create_file_with_content(file_download_path, response.content, mode='wb') get_logger().info( 'Successful download of file \'%s\' with key \'%s\' in path \'%s\'', parsed_event.file_name, parsed_event.object_key, file_download_path) else: raise OnedataDownloadError(file_name=parsed_event.object_key, status_code=response.status_code) return file_download_path