Exemple #1
0
    def evaluate(self, data):
        """Evaluate the code needed to compute a given Data object."""
        try:
            inputs = copy.deepcopy(data.input)
            hydrate_input_references(inputs, data.process.input_schema)
            hydrate_input_uploads(inputs, data.process.input_schema)

            # Include special 'proc' variable in the context.
            inputs["proc"] = {
                "data_id": data.id,
                "data_dir": self.manager.get_executor().resolve_data_path(),
            }

            # Include special 'requirements' variable in the context.
            inputs["requirements"] = data.process.requirements
            # Inject default values and change resources according to
            # the current Django configuration.
            inputs["requirements"]["resources"] = data.process.get_resource_limits()

            script_template = data.process.run.get("program", "")

            # Get the appropriate expression engine. If none is defined, do not evaluate
            # any expressions.
            expression_engine = data.process.requirements.get("expression-engine", None)
            if not expression_engine:
                return script_template

            return self.get_expression_engine(expression_engine).evaluate_block(
                script_template, inputs, escape=self._escape, safe_wrapper=SafeString,
            )
        except EvaluationError as error:
            raise ExecutionError("{}".format(error))
Exemple #2
0
    def evaluate(self, data):
        """Evaluate the code needed to compute a given Data object."""
        try:
            inputs = copy.deepcopy(data.input)
            hydrate_input_references(inputs, data.process.input_schema)
            hydrate_input_uploads(inputs, data.process.input_schema)

            # Include special 'proc' variable in the context.
            inputs['proc'] = {
                'data_id': data.id,
                'data_dir': settings.FLOW_EXECUTOR['DATA_DIR'],
            }

            # Include special 'requirements' variable in the context.
            inputs['requirements'] = data.process.requirements

            script_template = data.process.run.get('program', '')

            # Get the appropriate expression engine. If none is defined, do not evaluate
            # any expressions.
            expression_engine = data.process.requirements.get(
                'expression-engine', None)
            if not expression_engine:
                return script_template

            return self.get_expression_engine(
                expression_engine).evaluate_block(
                    script_template,
                    inputs,
                    escape=self._escape,
                    safe_wrapper=SafeString,
                )
        except EvaluationError as error:
            raise ExecutionError('{}'.format(error))
Exemple #3
0
    def evaluate(self, data):
        """Evaluate the code needed to compute a given Data object."""
        try:
            inputs = copy.deepcopy(data.input)
            hydrate_input_references(inputs, data.process.input_schema)
            hydrate_input_uploads(inputs, data.process.input_schema)

            # Include special 'proc' variable in the context.
            inputs['proc'] = {
                'data_id': data.id,
                'data_dir': self.manager.get_executor().resolve_data_path(),
            }

            # Include special 'requirements' variable in the context.
            inputs['requirements'] = data.process.requirements
            # Inject default values and change resources according to
            # the current Django configuration.
            inputs['requirements']['resources'] = data.process.get_resource_limits()

            script_template = data.process.run.get('program', '')

            # Get the appropriate expression engine. If none is defined, do not evaluate
            # any expressions.
            expression_engine = data.process.requirements.get('expression-engine', None)
            if not expression_engine:
                return script_template

            return self.get_expression_engine(expression_engine).evaluate_block(
                script_template, inputs,
                escape=self._escape,
                safe_wrapper=SafeString,
            )
        except EvaluationError as error:
            raise ExecutionError('{}'.format(error))
Exemple #4
0
    def prepare_runtime(self, runtime_dir, data):
        """Prepare runtime directory."""
        # Copy over Python process runtime (resolwe.process).
        import resolwe.process as runtime_package

        src_dir = os.path.dirname(inspect.getsourcefile(runtime_package))
        dest_package_dir = os.path.join(runtime_dir, PYTHON_RUNTIME_DIRNAME,
                                        'resolwe', 'process')
        shutil.copytree(src_dir, dest_package_dir)
        os.chmod(dest_package_dir, 0o755)

        # Write python source file.
        source = data.process.run.get('program', '')
        program_path = os.path.join(runtime_dir, PYTHON_PROGRAM_FILENAME)
        with open(program_path, 'w') as file:
            file.write(source)
        os.chmod(program_path, 0o755)

        # Write serialized inputs.
        inputs = copy.deepcopy(data.input)
        hydrate_input_references(inputs, data.process.input_schema)
        hydrate_input_uploads(inputs, data.process.input_schema)
        inputs_path = os.path.join(runtime_dir, PYTHON_INPUTS_FILENAME)

        # XXX: Skip serialization of LazyStorageJSON. We should support
        # LazyStorageJSON in Python processes on the new communication protocol
        def default(obj):
            """Get default value."""
            class_name = obj.__class__.__name__
            if class_name == 'LazyStorageJSON':
                return ''

            raise TypeError(
                f'Object of type {class_name} is not JSON serializable')

        with open(inputs_path, 'w') as file:
            json.dump(inputs, file, default=default)

        # Write serialized requirements.
        # Include special 'requirements' variable in the context.
        requirements = copy.deepcopy(data.process.requirements)
        # Inject default values and change resources according to
        # the current Django configuration.
        requirements['resources'] = data.process.get_resource_limits()
        requirements_path = os.path.join(runtime_dir,
                                         PYTHON_REQUIREMENTS_FILENAME)

        with open(requirements_path, 'w') as file:
            json.dump(requirements, file)

        # Generate volume maps required to expose needed files.
        volume_maps = {
            PYTHON_RUNTIME_DIRNAME: PYTHON_RUNTIME_VOLUME,
            PYTHON_PROGRAM_FILENAME: PYTHON_PROGRAM_VOLUME,
            PYTHON_INPUTS_FILENAME: PYTHON_INPUTS_VOLUME,
            PYTHON_REQUIREMENTS_FILENAME: PYTHON_REQUIREMENTS_VOLUME,
        }

        return volume_maps
Exemple #5
0
def input_(data, field_path):
    """Return a hydrated value of the ``input`` field."""
    data_obj = Data.objects.get(id=data["__id"])

    inputs = copy.deepcopy(data_obj.input)
    # XXX: Optimize by hydrating only the required field (major refactoring).
    hydrate_input_references(inputs, data_obj.process.input_schema)
    hydrate_input_uploads(inputs, data_obj.process.input_schema)

    return dict_dot(inputs, field_path)
Exemple #6
0
def input_(data, field_path):
    """Return a hydrated value of the ``input`` field."""
    data_obj = Data.objects.get(id=data['__id'])

    inputs = copy.deepcopy(data_obj.input)
    # XXX: Optimize by hydrating only the required field (major refactoring).
    hydrate_input_references(inputs, data_obj.process.input_schema)
    hydrate_input_uploads(inputs, data_obj.process.input_schema)

    return dict_dot(inputs, field_path)
Exemple #7
0
    def prepare_runtime(self, runtime_dir, data):
        """Prepare runtime directory."""
        # Copy over Python process runtime (resolwe.process).
        import resolwe.process as runtime_package

        src_dir = os.path.dirname(inspect.getsourcefile(runtime_package))
        dest_package_dir = os.path.join(runtime_dir, PYTHON_RUNTIME_DIRNAME, 'resolwe', 'process')
        shutil.copytree(src_dir, dest_package_dir)
        os.chmod(dest_package_dir, 0o755)

        # Write python source file.
        source = data.process.run.get('program', '')
        program_path = os.path.join(runtime_dir, PYTHON_PROGRAM_FILENAME)
        with open(program_path, 'w') as file:
            file.write(source)
        os.chmod(program_path, 0o755)

        # Write serialized inputs.
        inputs = copy.deepcopy(data.input)
        hydrate_input_references(inputs, data.process.input_schema)
        hydrate_input_uploads(inputs, data.process.input_schema)
        inputs_path = os.path.join(runtime_dir, PYTHON_INPUTS_FILENAME)

        # XXX: Skip serialization of LazyStorageJSON. We should support
        # LazyStorageJSON in Python processes on the new communication protocol
        def default(obj):
            """Get default value."""
            class_name = obj.__class__.__name__
            if class_name == 'LazyStorageJSON':
                return ''

            raise TypeError(f'Object of type {class_name} is not JSON serializable')

        with open(inputs_path, 'w') as file:
            json.dump(inputs, file, default=default)

        # Generate volume maps required to expose needed files.
        volume_maps = {
            PYTHON_RUNTIME_DIRNAME: PYTHON_RUNTIME_VOLUME,
            PYTHON_PROGRAM_FILENAME: PYTHON_PROGRAM_VOLUME,
            PYTHON_INPUTS_FILENAME: PYTHON_INPUTS_VOLUME,
        }

        return volume_maps
Exemple #8
0
    def test_hydrate_input_references(self):
        process = Process.objects.create(
            contributor=self.contributor,
            type='data:test:',
            output_schema=[
                {
                    'name': 'file',
                    'type': 'basic:file:',
                }, {
                    'name': 'file_list',
                    'type': 'list:basic:file:',
                }, {
                    'name': 'dir',
                    'type': 'basic:dir:',
                }, {
                    'name': 'dir_list',
                    'type': 'list:basic:dir:',
                },
            ],
        )
        descriptor_schema = DescriptorSchema.objects.create(
            contributor=self.contributor,
            schema=[
                {
                    'name': 'annotation',
                    'type': 'basic:string:',
                },
            ],
        )
        data = Data.objects.create(
            contributor=self.contributor,
            status=Data.STATUS_ERROR,
            process=process,
            # Workaround for skipping the validation.
            output={
                'file': {'file': 'some-file', 'refs': ['ref1']},
                'file_list': [{'file': 'some-file', 'refs': ['ref2']}, {'file': 'another-file'}],
                'dir': {'dir': 'some-dir', 'refs': ['ref3']},
                'dir_list': [{'dir': 'some-dir', 'refs': ['ref4']}, {'dir': 'another-dir'}],
            },
            descriptor_schema=descriptor_schema,
            descriptor={
                'annotation': 'my-annotation',
            },
            size=0,
        )

        input_schema = [
            {
                'name': 'data',
                'type': 'data:test:',
            },
        ]
        input_ = {'data': data.pk}
        hydrate_input_references(input_, input_schema)

        path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(data.id))

        self.assertEqual(input_['data']['__descriptor'], {'annotation': 'my-annotation'})
        self.assertEqual(input_['data']['__type'], 'data:test:')
        self.assertEqual(input_['data']['__id'], data.id)

        self.assertEqual(input_['data']['file']['file'].data_id, data.id)
        self.assertEqual(input_['data']['file']['file'].file_name, 'some-file')
        self.assertEqual(str(input_['data']['file']['file']), os.path.join(path_prefix, 'some-file'))

        self.assertEqual(input_['data']['file']['refs'][0].data_id, data.id)
        self.assertEqual(input_['data']['file']['refs'][0].file_name, 'ref1')
        self.assertEqual(str(input_['data']['file']['refs'][0]), os.path.join(path_prefix, 'ref1'))

        self.assertEqual(input_['data']['file_list'][0]['file'].data_id, data.id)
        self.assertEqual(input_['data']['file_list'][0]['file'].file_name, 'some-file')
        self.assertEqual(str(input_['data']['file_list'][0]['file']), os.path.join(path_prefix, 'some-file'))

        self.assertEqual(input_['data']['file_list'][0]['refs'][0].data_id, data.id)
        self.assertEqual(input_['data']['file_list'][0]['refs'][0].file_name, 'ref2')
        self.assertEqual(str(input_['data']['file_list'][0]['refs'][0]), os.path.join(path_prefix, 'ref2'))

        self.assertEqual(input_['data']['file_list'][1]['file'].data_id, data.id)
        self.assertEqual(input_['data']['file_list'][1]['file'].file_name, 'another-file')
        self.assertEqual(str(input_['data']['file_list'][1]['file']), os.path.join(path_prefix, 'another-file'))

        self.assertEqual(input_['data']['dir']['dir'].data_id, data.id)
        self.assertEqual(input_['data']['dir']['dir'].file_name, 'some-dir')
        self.assertEqual(str(input_['data']['dir']['dir']), os.path.join(path_prefix, 'some-dir'))

        self.assertEqual(input_['data']['dir']['refs'][0].data_id, data.id)
        self.assertEqual(input_['data']['dir']['refs'][0].file_name, 'ref3')
        self.assertEqual(str(input_['data']['dir']['refs'][0]), os.path.join(path_prefix, 'ref3'))

        self.assertEqual(input_['data']['dir_list'][0]['dir'].data_id, data.id)
        self.assertEqual(input_['data']['dir_list'][0]['dir'].file_name, 'some-dir')
        self.assertEqual(str(input_['data']['dir_list'][0]['dir']), os.path.join(path_prefix, 'some-dir'))

        self.assertEqual(input_['data']['dir_list'][0]['refs'][0].data_id, data.id)
        self.assertEqual(input_['data']['dir_list'][0]['refs'][0].file_name, 'ref4')
        self.assertEqual(str(input_['data']['dir_list'][0]['refs'][0]), os.path.join(path_prefix, 'ref4'))

        self.assertEqual(input_['data']['dir_list'][1]['dir'].data_id, data.id)
        self.assertEqual(input_['data']['dir_list'][1]['dir'].file_name, 'another-dir')
        self.assertEqual(str(input_['data']['dir_list'][1]['dir']), os.path.join(path_prefix, 'another-dir'))