def execute_function(self): if SysUtils.is_var_in_env('SCRIPT'): script_path = SysUtils.join_paths( SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME) script_content = StrUtils.base64_to_str( SysUtils.get_env_var('SCRIPT')) FileUtils.create_file_with_content(script_path, script_content) get_logger().info("Script file created in '%s'", script_path) FileUtils.set_file_execution_rights(script_path) get_logger().info("Executing user defined script: '%s'", script_path) try: pyinstaller_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH') orig_library_path = SysUtils.get_env_var( 'LD_LIBRARY_PATH_ORIG') if orig_library_path: SysUtils.set_env_var('LD_LIBRARY_PATH', orig_library_path) self.output = subprocess.check_output( ['/bin/sh', script_path], stderr=subprocess.STDOUT).decode("latin-1") SysUtils.set_env_var('LD_LIBRARY_PATH', pyinstaller_library_path) get_logger().debug("CONTAINER OUTPUT:\n %s", self.output) except subprocess.CalledProcessError as cpe: # Exit with user script return code if an # error occurs (Kubernetes handles the error) get_logger().error(cpe.output.decode('latin-1')) sys.exit(cpe.returncode) else: get_logger().error('No user script found!')
def launch_udocker_container(self): """Launches the udocker container. If the execution time of the container exceeds the defined execution time, the container is killed and a warning is raised.""" remaining_seconds = self.lambda_instance.get_remaining_time_in_seconds( ) get_logger().info( "Executing udocker container. Timeout set to '%d' seconds", remaining_seconds) get_logger().debug("Udocker command: '%s'", self.cont_cmd) with open(self._CONTAINER_OUTPUT_FILE, "wb") as out: with subprocess.Popen(self.cont_cmd, stderr=subprocess.STDOUT, stdout=out, start_new_session=True) as process: try: process.wait(timeout=remaining_seconds) except subprocess.TimeoutExpired: get_logger().info("Stopping process '%s'", process) process.kill() raise ContainerTimeoutExpiredWarning() udocker_output = b'' if FileUtils.is_file(self._CONTAINER_OUTPUT_FILE): udocker_output = FileUtils.read_file(self._CONTAINER_OUTPUT_FILE, file_mode="rb") return udocker_output
def save_event(self, input_dir_path): file_path = SysUtils.join_paths(input_dir_path, self._FILE_NAME) if self.has_json_body(): FileUtils.create_file_with_content(file_path, self.body) else: FileUtils.create_file_with_content(file_path, base64.b64decode(self.body), mode='wb') return file_path
def test_create_file_with_json_content(self): mopen = mock.mock_open() with mock.patch('builtins.open', mopen, create=True): FileUtils.create_file_with_content('/tmp/file', { "k1": "v1", "k2": "v2" }) mopen.assert_called_once_with('/tmp/file', 'w') mopen().write.assert_called_once_with('{"k1": "v1", "k2": "v2"}')
def _create_tmp_dirs(self): """Creates the temporal directories where the input/output data is going to be stored. The folders are deleted automatically when the execution finishes. """ self.input_tmp_dir = FileUtils.create_tmp_dir() self.output_tmp_dir = FileUtils.create_tmp_dir() SysUtils.set_env_var("TMP_INPUT_DIR", self.input_tmp_dir.name) SysUtils.set_env_var("TMP_OUTPUT_DIR", self.output_tmp_dir.name)
def __init__(self, lambda_instance): self.lambda_instance = lambda_instance # Create required udocker folder FileUtils.create_folder(SysUtils.get_env_var("UDOCKER_DIR")) # Init the udocker command that will be executed self.udocker_exec = [SysUtils.get_env_var("UDOCKER_EXEC")] self.cont_cmd = self.udocker_exec + ["--quiet", "run"] self.cont_img_id = ConfigUtils.read_cfg_var('container').get('image') if not self.cont_img_id: raise ContainerImageNotFoundError()
def _get_script_path(self): script_path = None if SysUtils.is_var_in_env('SCRIPT'): script_path = SysUtils.join_paths( SysUtils.get_env_var("TMP_INPUT_DIR"), self._SCRIPT_FILE_NAME) script_content = StrUtils.base64_to_str( SysUtils.get_env_var('SCRIPT')) FileUtils.create_file_with_content(script_path, script_content) get_logger().info("Script file created in '%s'", script_path) elif FileUtils.is_file(self._OSCAR_SCRIPT_PATH): script_path = self._OSCAR_SCRIPT_PATH return script_path
def _parse_exec_script_and_commands(self): # Check for script in function event if 'script' in self.raw_event: self.script_path = f"{self.input_folder}/script.sh" script_content = StrUtils.base64_to_str(self.raw_event['script']) FileUtils.create_file_with_content(self.script_path, script_content) # Container invoked with arguments elif 'cmd_args' in self.raw_event: # Add args self.cmd_args = json.loads(self.raw_event['cmd_args']) # Script to be executed every time (if defined) elif ConfigUtils.read_cfg_var('init_script') is not '': # Add init script self.init_script_path = f"{self.input_folder}/init_script.sh" FileUtils.cp_file(ConfigUtils.read_cfg_var('init_script'), self.init_script_path)
def _parse_exec_script_and_commands(self): # Check for script in function event if 'script' in self.raw_event: self.script_path = f"{self.input_folder}/script.sh" script_content = StrUtils.base64_to_str(self.raw_event['script']) FileUtils.create_file_with_content(self.script_path, script_content) # Container invoked with arguments elif 'cmd_args' in self.raw_event: # Add args self.cmd_args = json.loads(self.raw_event['cmd_args']) # Script to be executed every time (if defined) elif SysUtils.is_var_in_env('INIT_SCRIPT_PATH'): # Add init script self.init_script_path = f"{self.input_folder}/init_script.sh" FileUtils.cp_file(SysUtils.get_env_var("INIT_SCRIPT_PATH"), self.init_script_path)
def test_read_binary_file(self): mopen = mock.mock_open(read_data='fifayfofum') with mock.patch('builtins.open', mopen, create=True): content = FileUtils.read_file('/tmp/file', file_mode='rb') mopen.assert_called_once_with('/tmp/file', mode='rb', encoding=None) self.assertEqual(content, 'fifayfofum')
def upload_output(storage_provider, output_dir_path): """Receives the tmp_dir_path where the files to upload are stored and uploads all the files found there.""" get_logger().info("Searching for files to upload in folder '%s'", output_dir_path) output_files = FileUtils.get_all_files_in_dir(output_dir_path) get_logger().info("Found the following files to upload: '%s'", output_files) for file_path in output_files: file_name = file_path.replace(f"{output_dir_path}/", "") storage_provider.upload_file(file_path, file_name)
def _parse_input(self): """Download input data from storage provider or save data from POST request. A function can have information from several storage providers but one event always represents only one file (so far), so only one provider is going to be used for each event received. """ input_file_path = self.stg_config.download_input(self.parsed_event, self.input_tmp_dir.name) if input_file_path and FileUtils.is_file(input_file_path): SysUtils.set_env_var('INPUT_FILE_PATH', input_file_path) get_logger().info('INPUT_FILE_PATH variable set to \'%s\'', input_file_path)
def download_file(self, parsed_event, input_dir_path): """Downloads the file from the space of Onedata and returns the path were the download is placed. """ file_download_path = "" url = f'https://{self.oneprovider_host}{self._CDMI_PATH}{parsed_event.object_key}' get_logger().info('Downloading item from host \'%s\' with key \'%s\'', self.oneprovider_host, parsed_event.object_key) response = requests.get(url, headers=self.headers) if response.status_code == 200: file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) FileUtils.create_file_with_content(file_download_path, response.content, mode='wb') get_logger().info( 'Successful download of file \'%s\' with key \'%s\' in path \'%s\'', parsed_event.file_name, parsed_event.object_key, file_download_path) else: raise OnedataDownloadError(file_name=parsed_event.object_key, status_code=response.status_code) return file_download_path
def download_file(self, parsed_event, input_dir_path): """ Downloads the file from the space of Onedata and returns the path were the download is placed. """ file_download_path = "" url = f"https://{self.oneprovider_host}/{self._CDMI_PATH}{parsed_event.object_key}" get_logger().info("Downloading item from host '%s' with key '%s'", self.oneprovider_host, parsed_event.object_key) response = requests.get(url, headers=self.headers) if response.status_code == 200: file_download_path = SysUtils.join_paths(input_dir_path, parsed_event.file_name) FileUtils.create_file_with_content(file_download_path, response.content, mode='wb') get_logger().info( "Successful download of file '%s' with key '%s' in path '%s'", parsed_event.file_name, parsed_event.object_key, file_download_path) else: get_logger().error( "File '%s' download from Onedata host '%s' failed!", parsed_event.file_name, self.oneprovider_host) return file_download_path
def upload_output(self, output_dir_path): """Receives the tmp_dir_path where the files to upload are stored and uploads files whose name matches the prefixes and suffixes specified in 'output'.""" get_logger().info('Searching for files to upload in folder \'%s\'', output_dir_path) output_files = FileUtils.get_all_files_in_dir(output_dir_path) stg_providers = {} # Filter files by prefix and suffix for output in self.output: get_logger().info( 'Checking files for uploading to \'%s\' on path: \'%s\'', output['storage_provider'], output['path']) provider_type = StrUtils.get_storage_type( output['storage_provider']) provider_id = StrUtils.get_storage_id(output['storage_provider']) for file_path in output_files: file_name = file_path.replace(f'{output_dir_path}/', '') prefix_ok = False suffix_ok = False # Check prefixes if ('prefix' not in output or len(output['prefix']) == 0): prefix_ok = True else: for pref in output['prefix']: if file_name.startswith(pref): prefix_ok = True break if prefix_ok: # Check suffixes if ('suffix' not in output or len(output['suffix']) == 0): suffix_ok = True else: for suff in output['suffix']: if file_name.endswith(suff): suffix_ok = True break # Only upload file if name matches the prefixes and suffixes if suffix_ok: if provider_type not in stg_providers: stg_providers[provider_type] = {} if provider_id not in stg_providers[provider_type]: auth_data = self._get_auth_data( provider_type, provider_id) stg_providers[provider_type][ provider_id] = create_provider(auth_data) stg_providers[provider_type][provider_id].upload_file( file_path, file_name, output['path'])
def _parse_input(self): """Download input data from storage provider or save data from POST request. A function can have information from several storage providers but one event always represents only one file (so far), so only one provider is going to be used for each event received. """ stg_prov = self._get_input_provider() get_logger().info("Found '%s' input provider", stg_prov.get_type()) if stg_prov: get_logger().info("Downloading input file using '%s' event", self.parsed_event.get_type()) input_file_path = storage.download_input( stg_prov, self.parsed_event, SysUtils.get_env_var("TMP_INPUT_DIR")) if input_file_path and FileUtils.is_file(input_file_path): SysUtils.set_env_var("INPUT_FILE_PATH", input_file_path) get_logger().info("INPUT_FILE_PATH variable set to '%s'", input_file_path)
def save_event(self, input_dir_path): """Stores the unknown event and returns the file path where the file is stored.""" file_path = SysUtils.join_paths(input_dir_path, self._file_name) try: json.loads(self.event) except ValueError: FileUtils.create_file_with_content(file_path, base64.b64decode(self.event), mode='wb') except TypeError: FileUtils.create_file_with_content(file_path, self.event) else: FileUtils.create_file_with_content(file_path, self.event) return file_path
def upload_file(self, file_path, file_name, output_path): """Uploads the file to the Onedata output path.""" file_name = file_name.strip('/') output_path = output_path.strip('/') upload_path = f'{output_path}/{file_name}' upload_folder = FileUtils.get_dir_name(upload_path) # Create output folder (and subfolders) if it does not exists if not self._folder_exists(upload_folder): folders = upload_folder.split('/') path = '' for folder in folders: path = f'{path}/{folder}' if not self._folder_exists(path): self._create_folder(path) # Upload the file url = (f'https://{self.oneprovider_host}{self._CDMI_PATH}/' f'{self.oneprovider_space}/{upload_path}') get_logger().info('Uploading file \'%s\' to space \'%s\'', upload_path, self.oneprovider_space) with open(file_path, 'rb') as data: response = requests.put(url, data=data, headers=self.headers) if response.status_code not in [201, 202, 204]: raise OnedataUploadError(file_name=file_name, status_code=response.status_code)
def test_cp_file(self, mock_cp): FileUtils.cp_file('/tmp/src_file', '/tmp/dst_file') mock_cp.assert_called_once_with('/tmp/src_file', '/tmp/dst_file')
def _set_event_params(self): self.bucket_arn = self.event_records['s3']['bucket']['arn'] self.bucket_name = self.event_records['s3']['bucket']['name'] self.object_key = unquote_plus( self.event_records['s3']['object']['key']) self.file_name = FileUtils.get_file_name(self.object_key)
class Udocker(): """Class in charge of managing the udocker binary.""" _CONTAINER_OUTPUT_FILE = SysUtils.join_paths(FileUtils.get_tmp_dir(), "container-stdout") _CONTAINER_NAME = "udocker_container" _SCRIPT_EXEC = "/bin/bash" def __init__(self, lambda_instance): self.lambda_instance = lambda_instance # Create required udocker folder FileUtils.create_folder(SysUtils.get_env_var("UDOCKER_DIR")) # Init the udocker command that will be executed self.udocker_exec = [SysUtils.get_env_var("UDOCKER_EXEC")] self.cont_cmd = self.udocker_exec + ["--quiet", "run"] self.cont_img_id = ConfigUtils.read_cfg_var('container').get('image') if not self.cont_img_id: raise ContainerImageNotFoundError() def _list_udocker_images_cmd(self): return self.udocker_exec + ["images"] def _load_udocker_image_cmd(self): return self.udocker_exec + ["load", "-i", self.cont_img_id] def _download_udocker_image_cmd(self): return self.udocker_exec + ["pull", self.cont_img_id] def _list_udocker_containers_cmd(self): return self.udocker_exec + ["ps"] def _create_udocker_container_cmd(self): return self.udocker_exec + [ "create", f"--name={self._CONTAINER_NAME}", self.cont_img_id ] def _set_udocker_container_execution_mode_cmd(self): return self.udocker_exec + [ "setup", "--execmode=F1", self._CONTAINER_NAME ] def _is_container_image_downloaded(self): cmd_out = SysUtils.execute_cmd_and_return_output( self._list_udocker_images_cmd()) return self.cont_img_id in cmd_out def _load_local_container_image(self): get_logger().info("Loading container image '%s'", self.cont_img_id) SysUtils.execute_cmd(self._load_udocker_image_cmd()) def _download_container_image(self): get_logger().info("Pulling container '%s' from Docker Hub", self.cont_img_id) SysUtils.execute_cmd(self._download_udocker_image_cmd()) def _is_container_available(self): cmd_out = SysUtils.execute_cmd_and_return_output( self._list_udocker_containers_cmd()) return self._CONTAINER_NAME in cmd_out def _create_image(self): if self._is_container_image_downloaded(): get_logger().info("Container image '%s' already available", self.cont_img_id) else: if SysUtils.is_var_in_env("IMAGE_FILE"): self._load_local_container_image() else: self._download_container_image() def _create_container(self): if self._is_container_available(): get_logger().info("Container already available") else: get_logger().info("Creating container based on image '%s'.", self.cont_img_id) SysUtils.execute_cmd(self._create_udocker_container_cmd()) SysUtils.execute_cmd(self._set_udocker_container_execution_mode_cmd()) def _create_command(self): self._add_container_volumes() self._add_container_environment_variables() # Container running script if hasattr(self.lambda_instance, 'script_path'): # Add script in memory as entrypoint self.cont_cmd += [(f"--entrypoint={self._SCRIPT_EXEC} " f"{self.lambda_instance.script_path}"), self._CONTAINER_NAME] # Container with args elif hasattr(self.lambda_instance, 'cmd_args'): # Add args self.cont_cmd += [self._CONTAINER_NAME] self.cont_cmd += self.lambda_instance.cmd_args # Script to be executed every time (if defined) elif hasattr(self.lambda_instance, 'init_script_path'): # Add init script self.cont_cmd += [(f"--entrypoint={self._SCRIPT_EXEC} " f"{self.lambda_instance.init_script_path}"), self._CONTAINER_NAME] # Only container else: self.cont_cmd += [self._CONTAINER_NAME] def _add_container_volumes(self): self.cont_cmd.extend(["-v", SysUtils.get_env_var("TMP_INPUT_DIR")]) self.cont_cmd.extend(["-v", SysUtils.get_env_var("TMP_OUTPUT_DIR")]) self.cont_cmd.extend( ["-v", "/dev", "-v", "/proc", "-v", "/etc/hosts", "--nosysdirs"]) if SysUtils.is_var_in_env('EXTRA_PAYLOAD'): self.cont_cmd.extend(["-v", self.lambda_instance.PERMANENT_FOLDER]) def _add_cont_env_vars(self): for key, value in SysUtils.get_cont_env_vars().items(): self.cont_cmd.extend(_parse_cont_env_var(key, value)) def _add_input_file(self): self.cont_cmd.extend( _parse_cont_env_var("INPUT_FILE_PATH", SysUtils.get_env_var("INPUT_FILE_PATH"))) def _add_output_dir(self): self.cont_cmd.extend( _parse_cont_env_var("TMP_OUTPUT_DIR", SysUtils.get_env_var("TMP_OUTPUT_DIR"))) def _add_storage_object_key(self): self.cont_cmd.extend( _parse_cont_env_var("STORAGE_OBJECT_KEY", SysUtils.get_env_var("STORAGE_OBJECT_KEY"))) def _add_extra_payload_path(self): self.cont_cmd.extend( _parse_cont_env_var("EXTRA_PAYLOAD", SysUtils.get_env_var("EXTRA_PAYLOAD"))) def _add_function_request_id(self): self.cont_cmd.extend( _parse_cont_env_var("REQUEST_ID", self.lambda_instance.get_request_id())) def _add_aws_access_keys(self): self.cont_cmd.extend( _parse_cont_env_var("AWS_ACCESS_KEY_ID", SysUtils.get_env_var("AWS_ACCESS_KEY_ID"))) self.cont_cmd.extend( _parse_cont_env_var("AWS_SECRET_ACCESS_KEY", SysUtils.get_env_var("AWS_SECRET_ACCESS_KEY"))) self.cont_cmd.extend( _parse_cont_env_var("AWS_SESSION_TOKEN", SysUtils.get_env_var("AWS_SESSION_TOKEN"))) def _add_function_ip(self): self.cont_cmd.extend( _parse_cont_env_var("INSTANCE_IP", get_function_ip())) def _add_container_environment_variables(self): self._add_function_request_id() self._add_function_ip() self._add_aws_access_keys() self._add_cont_env_vars() self._add_input_file() self._add_output_dir() self._add_storage_object_key() self._add_extra_payload_path() def prepare_container(self): """Prepares the environment to execute the udocker container.""" self._create_image() self._create_container() self._create_command() def launch_udocker_container(self): """Launches the udocker container. If the execution time of the container exceeds the defined execution time, the container is killed and a warning is raised.""" remaining_seconds = self.lambda_instance.get_remaining_time_in_seconds( ) get_logger().info( "Executing udocker container. Timeout set to '%d' seconds", remaining_seconds) get_logger().debug("Udocker command: '%s'", self.cont_cmd) with open(self._CONTAINER_OUTPUT_FILE, "wb") as out: with subprocess.Popen(self.cont_cmd, stderr=subprocess.STDOUT, stdout=out, start_new_session=True) as process: try: process.wait(timeout=remaining_seconds) except subprocess.TimeoutExpired: get_logger().info("Stopping process '%s'", process) process.kill() raise ContainerTimeoutExpiredWarning() udocker_output = b'' if FileUtils.is_file(self._CONTAINER_OUTPUT_FILE): udocker_output = FileUtils.read_file(self._CONTAINER_OUTPUT_FILE, file_mode="rb") return udocker_output
def test_get_file_name(self, mock_os): FileUtils.get_file_name('/tmp/invented_file.jpg') mock_os.assert_called_with('/tmp/invented_file.jpg')
def test_set_file_execution_rights(self, mock_chmod, mock_stat): mock_stat().st_mode = 0o0000 FileUtils.set_file_execution_rights('/tmp/invented_file') # 73 is = 0o000 | 0o0111 in decimal mock_chmod.assert_called_once_with('/tmp/invented_file', 73)
def test_is_file(self, mock_os): FileUtils.is_file('/tmp/invented_file') mock_os.assert_called_with('/tmp/invented_file')
def test_get_all_files_in_dir(self, mock_os): mock_os.return_value = [('/tmp', ['t1'], ['f1', 'f2']), ('/tmp/t1', [], ['f3'])] files = FileUtils.get_all_files_in_dir('/tmp') mock_os.assert_called_once_with('/tmp') self.assertEqual(files, ['/tmp/f1', '/tmp/f2', '/tmp/t1/f3'])
def test_get_tmp_dir(self, mock_tmp): FileUtils.get_tmp_dir() mock_tmp.assert_called_once()
def test_create_tmp_dir(self, mock_tmp): FileUtils.create_tmp_dir() mock_tmp.assert_called_once()
def test_create_file_with_content(self): mopen = mock.mock_open() with mock.patch('builtins.open', mopen, create=True): FileUtils.create_file_with_content('/tmp/file', 'fifayfofum') mopen.assert_called_once_with('/tmp/file', 'w') mopen().write.assert_called_once_with('fifayfofum')
def test_create_folder(self, mock_dir): FileUtils.create_folder('/tmp/folder') mock_dir.assert_called_once_with('/tmp/folder', exist_ok=True)
def save_event(self, input_dir_path): """Stores the unknown event and returns the file path where the file is stored.""" file_path = SysUtils.join_paths(input_dir_path, self._FILE_NAME) FileUtils.create_file_with_content(file_path, self.event) return file_path