def evaluate(self, data): """Evaluate the code needed to compute a given Data object.""" try: inputs = copy.deepcopy(data.input) hydrate_input_references(inputs, data.process.input_schema) hydrate_input_uploads(inputs, data.process.input_schema) # Include special 'proc' variable in the context. inputs["proc"] = { "data_id": data.id, "data_dir": self.manager.get_executor().resolve_data_path(), } # Include special 'requirements' variable in the context. inputs["requirements"] = data.process.requirements # Inject default values and change resources according to # the current Django configuration. inputs["requirements"]["resources"] = data.process.get_resource_limits() script_template = data.process.run.get("program", "") # Get the appropriate expression engine. If none is defined, do not evaluate # any expressions. expression_engine = data.process.requirements.get("expression-engine", None) if not expression_engine: return script_template return self.get_expression_engine(expression_engine).evaluate_block( script_template, inputs, escape=self._escape, safe_wrapper=SafeString, ) except EvaluationError as error: raise ExecutionError("{}".format(error))
def evaluate(self, data): """Evaluate the code needed to compute a given Data object.""" try: inputs = copy.deepcopy(data.input) hydrate_input_references(inputs, data.process.input_schema) hydrate_input_uploads(inputs, data.process.input_schema) # Include special 'proc' variable in the context. inputs['proc'] = { 'data_id': data.id, 'data_dir': settings.FLOW_EXECUTOR['DATA_DIR'], } # Include special 'requirements' variable in the context. inputs['requirements'] = data.process.requirements script_template = data.process.run.get('program', '') # Get the appropriate expression engine. If none is defined, do not evaluate # any expressions. expression_engine = data.process.requirements.get( 'expression-engine', None) if not expression_engine: return script_template return self.get_expression_engine( expression_engine).evaluate_block( script_template, inputs, escape=self._escape, safe_wrapper=SafeString, ) except EvaluationError as error: raise ExecutionError('{}'.format(error))
def evaluate(self, data): """Evaluate the code needed to compute a given Data object.""" try: inputs = copy.deepcopy(data.input) hydrate_input_references(inputs, data.process.input_schema) hydrate_input_uploads(inputs, data.process.input_schema) # Include special 'proc' variable in the context. inputs['proc'] = { 'data_id': data.id, 'data_dir': self.manager.get_executor().resolve_data_path(), } # Include special 'requirements' variable in the context. inputs['requirements'] = data.process.requirements # Inject default values and change resources according to # the current Django configuration. inputs['requirements']['resources'] = data.process.get_resource_limits() script_template = data.process.run.get('program', '') # Get the appropriate expression engine. If none is defined, do not evaluate # any expressions. expression_engine = data.process.requirements.get('expression-engine', None) if not expression_engine: return script_template return self.get_expression_engine(expression_engine).evaluate_block( script_template, inputs, escape=self._escape, safe_wrapper=SafeString, ) except EvaluationError as error: raise ExecutionError('{}'.format(error))
def prepare_runtime(self, runtime_dir, data): """Prepare runtime directory.""" # Copy over Python process runtime (resolwe.process). import resolwe.process as runtime_package src_dir = os.path.dirname(inspect.getsourcefile(runtime_package)) dest_package_dir = os.path.join(runtime_dir, PYTHON_RUNTIME_DIRNAME, 'resolwe', 'process') shutil.copytree(src_dir, dest_package_dir) os.chmod(dest_package_dir, 0o755) # Write python source file. source = data.process.run.get('program', '') program_path = os.path.join(runtime_dir, PYTHON_PROGRAM_FILENAME) with open(program_path, 'w') as file: file.write(source) os.chmod(program_path, 0o755) # Write serialized inputs. inputs = copy.deepcopy(data.input) hydrate_input_references(inputs, data.process.input_schema) hydrate_input_uploads(inputs, data.process.input_schema) inputs_path = os.path.join(runtime_dir, PYTHON_INPUTS_FILENAME) # XXX: Skip serialization of LazyStorageJSON. We should support # LazyStorageJSON in Python processes on the new communication protocol def default(obj): """Get default value.""" class_name = obj.__class__.__name__ if class_name == 'LazyStorageJSON': return '' raise TypeError( f'Object of type {class_name} is not JSON serializable') with open(inputs_path, 'w') as file: json.dump(inputs, file, default=default) # Write serialized requirements. # Include special 'requirements' variable in the context. requirements = copy.deepcopy(data.process.requirements) # Inject default values and change resources according to # the current Django configuration. requirements['resources'] = data.process.get_resource_limits() requirements_path = os.path.join(runtime_dir, PYTHON_REQUIREMENTS_FILENAME) with open(requirements_path, 'w') as file: json.dump(requirements, file) # Generate volume maps required to expose needed files. volume_maps = { PYTHON_RUNTIME_DIRNAME: PYTHON_RUNTIME_VOLUME, PYTHON_PROGRAM_FILENAME: PYTHON_PROGRAM_VOLUME, PYTHON_INPUTS_FILENAME: PYTHON_INPUTS_VOLUME, PYTHON_REQUIREMENTS_FILENAME: PYTHON_REQUIREMENTS_VOLUME, } return volume_maps
def input_(data, field_path): """Return a hydrated value of the ``input`` field.""" data_obj = Data.objects.get(id=data["__id"]) inputs = copy.deepcopy(data_obj.input) # XXX: Optimize by hydrating only the required field (major refactoring). hydrate_input_references(inputs, data_obj.process.input_schema) hydrate_input_uploads(inputs, data_obj.process.input_schema) return dict_dot(inputs, field_path)
def input_(data, field_path): """Return a hydrated value of the ``input`` field.""" data_obj = Data.objects.get(id=data['__id']) inputs = copy.deepcopy(data_obj.input) # XXX: Optimize by hydrating only the required field (major refactoring). hydrate_input_references(inputs, data_obj.process.input_schema) hydrate_input_uploads(inputs, data_obj.process.input_schema) return dict_dot(inputs, field_path)
def prepare_runtime(self, runtime_dir, data): """Prepare runtime directory.""" # Copy over Python process runtime (resolwe.process). import resolwe.process as runtime_package src_dir = os.path.dirname(inspect.getsourcefile(runtime_package)) dest_package_dir = os.path.join(runtime_dir, PYTHON_RUNTIME_DIRNAME, 'resolwe', 'process') shutil.copytree(src_dir, dest_package_dir) os.chmod(dest_package_dir, 0o755) # Write python source file. source = data.process.run.get('program', '') program_path = os.path.join(runtime_dir, PYTHON_PROGRAM_FILENAME) with open(program_path, 'w') as file: file.write(source) os.chmod(program_path, 0o755) # Write serialized inputs. inputs = copy.deepcopy(data.input) hydrate_input_references(inputs, data.process.input_schema) hydrate_input_uploads(inputs, data.process.input_schema) inputs_path = os.path.join(runtime_dir, PYTHON_INPUTS_FILENAME) # XXX: Skip serialization of LazyStorageJSON. We should support # LazyStorageJSON in Python processes on the new communication protocol def default(obj): """Get default value.""" class_name = obj.__class__.__name__ if class_name == 'LazyStorageJSON': return '' raise TypeError(f'Object of type {class_name} is not JSON serializable') with open(inputs_path, 'w') as file: json.dump(inputs, file, default=default) # Generate volume maps required to expose needed files. volume_maps = { PYTHON_RUNTIME_DIRNAME: PYTHON_RUNTIME_VOLUME, PYTHON_PROGRAM_FILENAME: PYTHON_PROGRAM_VOLUME, PYTHON_INPUTS_FILENAME: PYTHON_INPUTS_VOLUME, } return volume_maps
def test_hydrate_input_references(self): process = Process.objects.create( contributor=self.contributor, type='data:test:', output_schema=[ { 'name': 'file', 'type': 'basic:file:', }, { 'name': 'file_list', 'type': 'list:basic:file:', }, { 'name': 'dir', 'type': 'basic:dir:', }, { 'name': 'dir_list', 'type': 'list:basic:dir:', }, ], ) descriptor_schema = DescriptorSchema.objects.create( contributor=self.contributor, schema=[ { 'name': 'annotation', 'type': 'basic:string:', }, ], ) data = Data.objects.create( contributor=self.contributor, status=Data.STATUS_ERROR, process=process, # Workaround for skipping the validation. output={ 'file': {'file': 'some-file', 'refs': ['ref1']}, 'file_list': [{'file': 'some-file', 'refs': ['ref2']}, {'file': 'another-file'}], 'dir': {'dir': 'some-dir', 'refs': ['ref3']}, 'dir_list': [{'dir': 'some-dir', 'refs': ['ref4']}, {'dir': 'another-dir'}], }, descriptor_schema=descriptor_schema, descriptor={ 'annotation': 'my-annotation', }, size=0, ) input_schema = [ { 'name': 'data', 'type': 'data:test:', }, ] input_ = {'data': data.pk} hydrate_input_references(input_, input_schema) path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(data.id)) self.assertEqual(input_['data']['__descriptor'], {'annotation': 'my-annotation'}) self.assertEqual(input_['data']['__type'], 'data:test:') self.assertEqual(input_['data']['__id'], data.id) self.assertEqual(input_['data']['file']['file'].data_id, data.id) self.assertEqual(input_['data']['file']['file'].file_name, 'some-file') self.assertEqual(str(input_['data']['file']['file']), os.path.join(path_prefix, 'some-file')) self.assertEqual(input_['data']['file']['refs'][0].data_id, data.id) self.assertEqual(input_['data']['file']['refs'][0].file_name, 'ref1') self.assertEqual(str(input_['data']['file']['refs'][0]), os.path.join(path_prefix, 'ref1')) self.assertEqual(input_['data']['file_list'][0]['file'].data_id, data.id) self.assertEqual(input_['data']['file_list'][0]['file'].file_name, 'some-file') self.assertEqual(str(input_['data']['file_list'][0]['file']), os.path.join(path_prefix, 'some-file')) self.assertEqual(input_['data']['file_list'][0]['refs'][0].data_id, data.id) self.assertEqual(input_['data']['file_list'][0]['refs'][0].file_name, 'ref2') self.assertEqual(str(input_['data']['file_list'][0]['refs'][0]), os.path.join(path_prefix, 'ref2')) self.assertEqual(input_['data']['file_list'][1]['file'].data_id, data.id) self.assertEqual(input_['data']['file_list'][1]['file'].file_name, 'another-file') self.assertEqual(str(input_['data']['file_list'][1]['file']), os.path.join(path_prefix, 'another-file')) self.assertEqual(input_['data']['dir']['dir'].data_id, data.id) self.assertEqual(input_['data']['dir']['dir'].file_name, 'some-dir') self.assertEqual(str(input_['data']['dir']['dir']), os.path.join(path_prefix, 'some-dir')) self.assertEqual(input_['data']['dir']['refs'][0].data_id, data.id) self.assertEqual(input_['data']['dir']['refs'][0].file_name, 'ref3') self.assertEqual(str(input_['data']['dir']['refs'][0]), os.path.join(path_prefix, 'ref3')) self.assertEqual(input_['data']['dir_list'][0]['dir'].data_id, data.id) self.assertEqual(input_['data']['dir_list'][0]['dir'].file_name, 'some-dir') self.assertEqual(str(input_['data']['dir_list'][0]['dir']), os.path.join(path_prefix, 'some-dir')) self.assertEqual(input_['data']['dir_list'][0]['refs'][0].data_id, data.id) self.assertEqual(input_['data']['dir_list'][0]['refs'][0].file_name, 'ref4') self.assertEqual(str(input_['data']['dir_list'][0]['refs'][0]), os.path.join(path_prefix, 'ref4')) self.assertEqual(input_['data']['dir_list'][1]['dir'].data_id, data.id) self.assertEqual(input_['data']['dir_list'][1]['dir'].file_name, 'another-dir') self.assertEqual(str(input_['data']['dir_list'][1]['dir']), os.path.join(path_prefix, 'another-dir'))