Exemple #1
0
    def run(self, data: Data, argv: List):
        """Select a concrete connector and run the process through it.

        :param data: The :class:`~resolwe.flow.models.Data` object that
            is to be run.
        :param argv: The argument vector used to spawn the executor.
        """
        process_scheduling = self.scheduling_class_map[
            data.process.scheduling_class]
        if "DISPATCHER_MAPPING" in getattr(settings, "FLOW_MANAGER", {}):
            class_name = settings.FLOW_MANAGER["DISPATCHER_MAPPING"][
                process_scheduling]
        else:
            class_name = getattr(settings, "FLOW_MANAGER",
                                 {}).get("NAME", DEFAULT_CONNECTOR)

        data.scheduled = now()
        data.save(update_fields=["scheduled"])

        workload_class = class_name.rsplit(".", maxsplit=1)[1]
        host, port, protocol = self._get_listener_settings(
            data, workload_class)
        argv[-1] += " {} {} {}".format(host, port, protocol)

        return self.connectors[class_name].submit(data, argv)
Exemple #2
0
    def _data_execute(self, data: Data):
        """Execute the Data object.

        The activities carried out here include target directory
        preparation, executor copying, setting serialization and actual
        execution of the object.

        :param data: The :class:`~resolwe.flow.models.Data` object to
            execute.
        """
        logger.debug(__("Manager preparing Data with id {} for processing.", data.id))

        # Prepare the executor's environment.
        try:
            self._prepare_data_dir(data)

            executor_module = ".{}".format(
                getattr(settings, "FLOW_EXECUTOR", {})
                .get("NAME", "resolwe.flow.executors.local")
                .rpartition(".executors.")[-1]
            )
            self._lock_inputs_local_storage_locations(data)

            argv = [
                "/bin/bash",
                "-c",
                getattr(settings, "FLOW_EXECUTOR", {}).get(
                    "PYTHON", "/usr/bin/env python"
                )
                + " -m executors "
                + executor_module
                + " {}".format(data.pk),
            ]
            self.executor.prepare_for_execution(data)
        except PermissionDenied as error:
            data.status = Data.STATUS_ERROR
            data.process_error.append("Permission denied for process: {}".format(error))
            data.save()
            if hasattr(data, "worker"):
                data.worker.status = Worker.STATUS_ERROR_PREPARING
                data.worker.save()
            return
        except OSError as err:
            logger.exception(
                __(
                    "OSError occurred while preparing data {} (will skip): {}",
                    data.id,
                    err,
                )
            )
            if hasattr(data, "worker"):
                data.worker.status = Worker.STATUS_ERROR_PREPARING
                data.worker.save()
            return

        # Hand off to the run() method for execution.
        logger.info(__("Running executor for data with id {}", data.pk))
        self.run(data, argv)
Exemple #3
0
    def run(self, data: Data, runtime_dir: Path, argv):
        """Select a concrete connector and run the process through it.

        :param data: The :class:`~resolwe.flow.models.Data` object that
            is to be run.
        :param runtime_dir: The directory the executor is run from.
        :param argv: The argument vector used to spawn the executor.
        """
        process_scheduling = self.scheduling_class_map[
            data.process.scheduling_class]
        if "DISPATCHER_MAPPING" in getattr(settings, "FLOW_MANAGER", {}):
            class_name = settings.FLOW_MANAGER["DISPATCHER_MAPPING"][
                process_scheduling]
        else:
            class_name = getattr(settings, "FLOW_MANAGER",
                                 {}).get("NAME", DEFAULT_CONNECTOR)

        data.scheduled = now()
        data.save(update_fields=["scheduled"])
        return self.connectors[class_name].submit(data, runtime_dir, argv)
Exemple #4
0
class BackendTest(TestCase):
    def setUp(self):
        u = get_user_model().objects.create_superuser('test', '*****@*****.**', 'test')
        self.p = Process(slug='test-processor',
                         name='Test Process',
                         contributor=u,
                         type='data:test',
                         version=1)
        self.p.save()

        self.d = Data(slug='test-data',
                      name='Test Data',
                      contributor=u,
                      process=self.p)
        self.d.save()

    def tearDown(self):
        for data in Data.objects.all():
            data_dir = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(data.id))
            shutil.rmtree(data_dir, ignore_errors=True)

    def test_manager(self):
        manager.communicate(verbosity=0)

    def test_dtlbash(self):
        self.p.slug = 'test-processor-dtlbash'
        self.p.run = {'script': """
gen-info \"Test processor info\"
gen-warning \"Test processor warning\"

echo '{"proc.info": "foo"}'
"""}
        self.p.save()

        self.d.slug = 'test-data-dtlbash'
        self.d.process = self.p
        self.d.save()
        self.d = Data(id=self.d.id)
Exemple #5
0
class BackendTest(TestCase):
    def setUp(self):
        super(BackendTest, self).setUp()

        self.p = Process(slug='test-processor',
                         name='Test Process',
                         contributor=self.contributor,
                         type='data:test',
                         version=1)
        self.p.save()

        self.d = Data(slug='test-data',
                      name='Test Data',
                      contributor=self.contributor,
                      process=self.p)
        self.d.save()

    def test_manager(self):
        manager.communicate(verbosity=0)

    def test_dtlbash(self):
        self.p.slug = 'test-processor-dtlbash'
        self.p.run = {
            'script':
            """
gen-info \"Test processor info\"
gen-warning \"Test processor warning\"

echo '{"proc.info": "foo"}'
"""
        }
        self.p.save()

        self.d.slug = 'test-data-dtlbash'
        self.d.process = self.p
        self.d.save()
        self.d = Data(id=self.d.id)
    def migrate_data(self, data):
        contributor = self.get_contributor(data["author_id"])

        # DESCRIPTOR SCHEMA ############################################
        ds_fields = []
        ds_fields.extend(data.get("static_schema", []))
        ds_fields.extend(data.get("var_template", []))
        ds_fields.sort(key=lambda d: d["name"])
        ds_fields_dumped = json.dumps(ds_fields)

        if ds_fields_dumped in self.descriptor_schema_index:
            descriptor_schema = self.descriptor_schema_index[ds_fields_dumped]
        else:
            descriptor_schema = DescriptorSchema(schema=ds_fields)
            descriptor_schema.name = "data_{}_descriptor".format(data["_id"])
            descriptor_schema.contributor = contributor
            descriptor_schema.save()

            self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema

        descriptor = {}
        descriptor.update(data.get("static", {}))
        descriptor.update(data.get("var", {}))

        # PROCESS ######################################################
        if "processor_version" not in data:
            data["processor_version"] = "0.0.0"

        process_slug = self.process_slug(data["processor_name"])
        process_version = data["processor_version"]
        try:
            process = Process.objects.get(slug=process_slug, version=process_version)
        except Process.DoesNotExist:
            latest = Process.objects.filter(slug=process_slug).order_by("-version").first()

            if latest:
                process = Process()
                process.name = latest.name
                process.slug = latest.slug
                process.category = latest.category
                process.description = latest.description
                process.contributor = latest.contributor

                process.version = process_version
                process.type = data["type"]
                process.output_schema = data["output_schema"]
                process.input_schema = data.get("input_schema", {})
                process.persistence = self.persistence_dict[data["persistence"]]

                process.run["script"] = 'gen-require common\ngen-error "Depricated process, use the latest version."'

                # XXX
                # process.created =
                # process.modified =

                process.save()

                # copy permissions from latest process
                for user, perms in get_users_with_perms(latest, attach_perms=True).iteritems():
                    for perm in perms:
                        assign_perm(perm, user, process)
                for group, perms in get_groups_with_perms(latest, attach_perms=True).iteritems():
                    for perm in perms:
                        assign_perm(perm, group, process)
            else:
                # Create dummy processor if there is no other version
                dummy_name = "Dummy processor of type {}".format(data["type"])
                try:
                    process = Process.objects.get(name=dummy_name)
                except Process.DoesNotExist:
                    process = Process.objects.create(
                        name=dummy_name,
                        slug="non-existent",
                        contributor=get_user_model().objects.filter(is_superuser=True).first(),
                        type=data["type"],
                        category="data:non-existent",
                        run={"script": {'gen-require common\ngen-error "This processor is not intendent to be run."'}},
                    )

        # DATA #########################################################
        new = Data()
        new.name = data.get("static", {}).get("name", "")
        if len(new.name) > 100:
            self.long_names.append(new.name)
            new.name = new.name[:97] + "..."
        new.status = self.status_dict[data["status"]]
        new.process = process
        new.contributor = contributor
        new.input = data["input"] if "input" in data else {}
        new.output = data["output"]
        new.descriptor_schema = descriptor_schema
        new.descriptor = descriptor
        new.checksum = data.get("checksum", "")
        # XXX: Django will change this on create
        new.created = data["date_created"]
        # XXX: Django will change this on save
        new.modified = data["date_modified"]
        if "date_start" in data and "date_finish" in data:
            new.started = data["date_start"]
            new.finished = data["date_finish"]
        elif "date_finish" in data:
            new.started = data["date_finish"]
            new.finished = data["date_finish"]
        elif "date_start" in data:
            new.started = data["date_start"]
            new.finished = data["date_start"]
        else:
            new.started = datetime.fromtimestamp(0)
            new.finished = datetime.fromtimestamp(0)
        new.save()

        for case_id in data["case_ids"]:
            try:
                collection = Collection.objects.get(pk=self.id_mapping["collection"][str(case_id)])
            except KeyError:
                self.missing_collections.add(str(case_id))
                continue
            collection.data.add(new)

        for field_schema, fields, path in iterate_fields(data["output"], data["output_schema"], ""):
            if "type" in field_schema and field_schema["type"].startswith("basic:json:"):
                self.storage_index[fields[field_schema["name"]]] = {"id": new.pk, "path": path}

        self.migrate_permissions(new, data)

        self.id_mapping["data"][str(data["_id"])] = new.pk

        # DESCRIPTOR SCHEMA PERMISSIONS ################################
        for user in get_users_with_perms(new):
            assign_perm("view_descriptorschema", user, obj=descriptor_schema)

        for group in get_groups_with_perms(new):
            assign_perm("view_descriptorschema", group, obj=descriptor_schema)
Exemple #7
0
        def process_data_object(data: Data):
            """Process a single data object."""
            # Lock for update. Note that we want this transaction to be as short as possible in
            # order to reduce contention and avoid deadlocks. This is why we do not lock all
            # resolving objects for update, but instead only lock one object at a time. This
            # allows managers running in parallel to process different objects.
            data = Data.objects.select_for_update().get(pk=data.pk)
            if data.status != Data.STATUS_RESOLVING:
                # The object might have already been processed while waiting for the lock to be
                # obtained. In this case, skip the object.
                return

            dep_status = dependency_status(data)

            if dep_status == Data.STATUS_ERROR:
                data.status = Data.STATUS_ERROR
                data.process_error.append("One or more inputs have status ERROR")
                data.process_rc = 1
                data.save()
                if hasattr(data, "worker"):
                    data.worker.status = Worker.STATUS_ERROR_PREPARING
                    data.worker.save(update_fields=["status"])

                return

            elif dep_status != Data.STATUS_DONE:
                return

            run_in_executor = False
            if data.process.run:
                try:
                    # Check if execution engine is sound and evaluate workflow.
                    execution_engine_name = data.process.run.get("language", None)
                    execution_engine = self.get_execution_engine(execution_engine_name)
                    run_in_executor = execution_engine_name != "workflow"
                    if not run_in_executor:
                        execution_engine.evaluate(data)
                    else:
                        # Set allocated resources
                        resource_limits = data.process.get_resource_limits()
                        data.process_memory = resource_limits["memory"]
                        data.process_cores = resource_limits["cores"]

                except (ExecutionError, InvalidEngineError) as error:
                    data.status = Data.STATUS_ERROR
                    data.process_error.append(
                        "Error in process script: {}".format(error)
                    )
                    data.save()
                    if hasattr(data, "worker"):
                        data.worker.status = Worker.STATUS_ERROR_PREPARING
                        data.worker.save(update_fields=["status"])

                    return
            if data.status != Data.STATUS_DONE:
                # The data object may already be marked as done by the execution engine. In this
                # case we must not revert the status to STATUS_WAITING.
                data.status = Data.STATUS_WAITING
            data.save(render_name=True)

            # Actually run the object only if there was nothing with the
            # transaction and was not already evaluated.
            if run_in_executor:
                transaction.on_commit(
                    # Make sure the closure gets the right values here, since they're
                    # changed in the loop.
                    lambda d=data: self._data_execute(d)
                )
Exemple #8
0
    def migrate_data(self, data):
        """Migrate data."""
        contributor = self.get_contributor(data[u'author_id'])

        # DESCRIPTOR SCHEMA ############################################
        ds_fields = []
        ds_fields.extend(data.get(u'static_schema', []))
        ds_fields.extend(data.get(u'var_template', []))
        ds_fields.sort(key=lambda d: d[u'name'])
        ds_fields_dumped = json.dumps(ds_fields)

        if ds_fields_dumped in self.descriptor_schema_index:
            descriptor_schema = self.descriptor_schema_index[ds_fields_dumped]
        else:
            descriptor_schema = DescriptorSchema(schema=ds_fields)
            descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id'])
            descriptor_schema.contributor = contributor
            descriptor_schema.save()

            self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema

        descriptor = {}
        descriptor.update(data.get(u'static', {}))
        descriptor.update(data.get(u'var', {}))

        # PROCESS ######################################################
        if u'processor_version' not in data:
            data[u'processor_version'] = '0.0.0'

        process_slug = self.process_slug(data[u'processor_name'])
        process_version = data[u'processor_version']
        try:
            process = Process.objects.get(slug=process_slug, version=process_version)
        except Process.DoesNotExist:
            latest = Process.objects.filter(slug=process_slug).order_by('-version').first()

            if latest:
                process = Process()
                process.name = latest.name
                process.slug = latest.slug
                process.category = latest.category
                process.description = latest.description
                process.contributor = latest.contributor

                process.version = process_version
                process.type = data[u'type']
                process.output_schema = data[u'output_schema']
                process.input_schema = data.get(u'input_schema', {})
                process.persistence = self.persistence_dict[data[u'persistence']]

                process.run['script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."'  # noqa pylint: disable=unsubscriptable-object

                # XXX
                # process.created =
                # process.modified =

                process.save()

                # copy permissions from latest process
                for user, perms in six.iteritems(get_users_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, user, process)
                for group, perms in six.iteritems(get_groups_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, group, process)
            else:
                # Create dummy processor if there is no other version
                dummy_name = 'Dummy processor of type {}'.format(data[u'type'])
                try:
                    process = Process.objects.get(name=dummy_name)
                except Process.DoesNotExist:
                    process = Process.objects.create(
                        name=dummy_name,
                        slug='non-existent',
                        contributor=get_user_model().objects.filter(is_superuser=True).first(),
                        type=data[u'type'],
                        category='data:non-existent',
                        run={'script': {'gen-require common\ngen-error "This processor is not intendent to be run."'}},
                    )

        # DATA #########################################################
        new = Data()
        new.name = data.get(u'static', {}).get(u'name', '')
        if len(new.name) > 100:
            self.long_names.append(new.name)
            new.name = new.name[:97] + '...'
        new.status = self.status_dict[data[u'status']]
        new.process = process
        new.contributor = contributor
        new.input = data[u'input'] if u'input' in data else {}
        new.output = data[u'output']
        new.descriptor_schema = descriptor_schema
        new.descriptor = descriptor
        new.checksum = data.get(u'checksum', '')
        # XXX: Django will change this on create
        new.created = data[u'date_created']
        # XXX: Django will change this on save
        new.modified = data[u'date_modified']
        if u'date_start' in data and u'date_finish' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_finish']
        elif u'date_finish' in data:
            new.started = data[u'date_finish']
            new.finished = data[u'date_finish']
        elif u'date_start' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_start']
        else:
            new.started = datetime.fromtimestamp(0)
            new.finished = datetime.fromtimestamp(0)
        new.save()

        for case_id in data[u'case_ids']:
            try:
                collection = Collection.objects.get(pk=self.id_mapping[u'collection'][str(case_id)])
            except KeyError:
                self.missing_collections.add(str(case_id))
                continue
            collection.data.add(new)

        for field_schema, fields, path in iterate_fields(data[u'output'], data[u'output_schema'], ''):
            if 'type' in field_schema and field_schema['type'].startswith('basic:json:'):
                self.storage_index[fields[field_schema['name']]] = {
                    'id': new.pk,
                    'path': path,
                }

        self.migrate_permissions(new, data)

        self.id_mapping['data'][str(data[u'_id'])] = new.pk

        # DESCRIPTOR SCHEMA PERMISSIONS ################################
        for user in get_users_with_perms(new):
            assign_perm('view_descriptorschema', user, obj=descriptor_schema)

        for group in get_groups_with_perms(new):
            assign_perm('view_descriptorschema', group, obj=descriptor_schema)
    def migrate_data(self, data):
        """Migrate data."""
        contributor = self.get_contributor(data[u'author_id'])

        # DESCRIPTOR SCHEMA ############################################
        ds_fields = []
        ds_fields.extend(data.get(u'static_schema', []))
        ds_fields.extend(data.get(u'var_template', []))
        ds_fields.sort(key=lambda d: d[u'name'])
        ds_fields_dumped = json.dumps(ds_fields)

        if ds_fields_dumped in self.descriptor_schema_index:
            descriptor_schema = self.descriptor_schema_index[ds_fields_dumped]
        else:
            descriptor_schema = DescriptorSchema(schema=ds_fields)
            descriptor_schema.name = 'data_{}_descriptor'.format(data[u'_id'])
            descriptor_schema.contributor = contributor
            descriptor_schema.save()

            self.descriptor_schema_index[ds_fields_dumped] = descriptor_schema

        descriptor = {}
        descriptor.update(data.get(u'static', {}))
        descriptor.update(data.get(u'var', {}))

        # PROCESS ######################################################
        if u'processor_version' not in data:
            data[u'processor_version'] = '0.0.0'

        process_slug = self.process_slug(data[u'processor_name'])
        process_version = data[u'processor_version']
        try:
            process = Process.objects.get(slug=process_slug,
                                          version=process_version)
        except Process.DoesNotExist:
            latest = Process.objects.filter(
                slug=process_slug).order_by('-version').first()

            if latest:
                process = Process()
                process.name = latest.name
                process.slug = latest.slug
                process.category = latest.category
                process.description = latest.description
                process.contributor = latest.contributor

                process.version = process_version
                process.type = data[u'type']
                process.output_schema = data[u'output_schema']
                process.input_schema = data.get(u'input_schema', {})
                process.persistence = self.persistence_dict[
                    data[u'persistence']]

                process.run[
                    'script'] = 'gen-require common\ngen-error "Depricated process, use the latest version."'  # noqa pylint: disable=unsubscriptable-object

                # XXX
                # process.created =
                # process.modified =

                process.save()

                # copy permissions from latest process
                for user, perms in six.iteritems(
                        get_users_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, user, process)
                for group, perms in six.iteritems(
                        get_groups_with_perms(latest, attach_perms=True)):
                    for perm in perms:
                        assign_perm(perm, group, process)
            else:
                # Create dummy processor if there is no other version
                dummy_name = 'Dummy processor of type {}'.format(data[u'type'])
                try:
                    process = Process.objects.get(name=dummy_name)
                except Process.DoesNotExist:
                    process = Process.objects.create(
                        name=dummy_name,
                        slug='non-existent',
                        contributor=get_user_model().objects.filter(
                            is_superuser=True).first(),
                        type=data[u'type'],
                        category='data:non-existent',
                        run={
                            'script': {
                                'gen-require common\ngen-error "This processor is not intendent to be run."'
                            }
                        },
                    )

        # DATA #########################################################
        new = Data()
        new.name = data.get(u'static', {}).get(u'name', '')
        if len(new.name) > 100:
            self.long_names.append(new.name)
            new.name = new.name[:97] + '...'
        new.status = self.status_dict[data[u'status']]
        new.process = process
        new.contributor = contributor
        new.input = data[u'input'] if u'input' in data else {}
        new.output = data[u'output']
        new.descriptor_schema = descriptor_schema
        new.descriptor = descriptor
        new.checksum = data.get(u'checksum', '')
        # XXX: Django will change this on create
        new.created = data[u'date_created']
        # XXX: Django will change this on save
        new.modified = data[u'date_modified']
        if u'date_start' in data and u'date_finish' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_finish']
        elif u'date_finish' in data:
            new.started = data[u'date_finish']
            new.finished = data[u'date_finish']
        elif u'date_start' in data:
            new.started = data[u'date_start']
            new.finished = data[u'date_start']
        else:
            new.started = datetime.fromtimestamp(0)
            new.finished = datetime.fromtimestamp(0)
        new.save()

        for case_id in data[u'case_ids']:
            try:
                collection = Collection.objects.get(
                    pk=self.id_mapping[u'collection'][str(case_id)])
            except KeyError:
                self.missing_collections.add(str(case_id))
                continue
            collection.data.add(new)

        for field_schema, fields, path in iterate_fields(
                data[u'output'], data[u'output_schema'], ''):
            if 'type' in field_schema and field_schema['type'].startswith(
                    'basic:json:'):
                self.storage_index[fields[field_schema['name']]] = {
                    'id': new.pk,
                    'path': path,
                }

        self.migrate_permissions(new, data)

        self.id_mapping['data'][str(data[u'_id'])] = new.pk

        # DESCRIPTOR SCHEMA PERMISSIONS ################################
        for user in get_users_with_perms(new):
            assign_perm('view_descriptorschema', user, obj=descriptor_schema)

        for group in get_groups_with_perms(new):
            assign_perm('view_descriptorschema', group, obj=descriptor_schema)
Exemple #10
0
        def process_data_object(data: Data):
            """Process a single data object."""
            # Lock for update. Note that we want this transaction to be as short as possible in
            # order to reduce contention and avoid deadlocks. This is why we do not lock all
            # resolving objects for update, but instead only lock one object at a time. This
            # allows managers running in parallel to process different objects.
            data = Data.objects.select_for_update().get(pk=data.pk)
            if data.status != Data.STATUS_RESOLVING:
                # The object might have already been processed while waiting for the lock to be
                # obtained. In this case, skip the object.
                return

            dep_status = dependency_status(data)

            if dep_status == Data.STATUS_ERROR:
                data.status = Data.STATUS_ERROR
                data.process_error.append(
                    "One or more inputs have status ERROR")
                data.process_rc = 1
                data.save()
                if hasattr(data, "worker"):
                    data.worker.status = Worker.STATUS_ERROR_PREPARING
                    data.worker.save(update_fields=["status"])

                return

            elif dep_status != Data.STATUS_DONE:
                return

            if data.process.run:
                try:
                    execution_engine = data.process.run.get("language", None)
                    # Evaluation by the execution engine may spawn additional data objects and
                    # perform other queries on the database. Queries of all possible execution
                    # engines need to be audited for possibilities of deadlocks in case any
                    # additional locks are introduced. Currently, we only take an explicit lock on
                    # the currently processing object.
                    program = self.get_execution_engine(
                        execution_engine).evaluate(data)
                except (ExecutionError, InvalidEngineError) as error:
                    data.status = Data.STATUS_ERROR
                    data.process_error.append(
                        "Error in process script: {}".format(error))
                    data.save()
                    if hasattr(data, "worker"):
                        data.worker.status = Worker.STATUS_ERROR_PREPARING
                        data.worker.save(update_fields=["status"])

                    return

                # Set allocated resources:
                resource_limits = data.process.get_resource_limits()
                data.process_memory = resource_limits["memory"]
                data.process_cores = resource_limits["cores"]
            else:
                # If there is no run section, then we should not try to run
                # anything. But the program must not be set to None as then
                # the process will be stuck in waiting state.
                program = ""

            if data.status != Data.STATUS_DONE:
                # The data object may already be marked as done by the execution engine. In this
                # case we must not revert the status to STATUS_WAITING.
                data.status = Data.STATUS_WAITING
            data.save(render_name=True)

            # Actually run the object only if there was nothing with the transaction.
            transaction.on_commit(
                # Make sure the closure gets the right values here, since they're
                # changed in the loop.
                lambda d=data, p=program: self._data_execute(d, p))
Exemple #11
0
    def _data_execute(self, data: Data, program: str):
        """Execute the Data object.

        The activities carried out here include target directory
        preparation, executor copying, setting serialization and actual
        execution of the object.

        :param data: The :class:`~resolwe.flow.models.Data` object to
            execute.
        :param program: The process text the manager got out of
            execution engine evaluation.
        :param executor: The executor to use for this object.
        """
        # Notify dispatcher if there is nothing to do so it can check whether
        # conditions for raising runtime barrier are fulfilled.
        if not program:
            return

        logger.debug(
            __("Manager preparing Data with id {} for processing.", data.id))

        # Prepare the executor's environment.
        try:
            executor_env_vars = self.get_executor().get_environment_variables()
            program = self._include_environment_variables(
                program, executor_env_vars)
            data_dir = self._prepare_data_dir(data)
            executor_module, runtime_dir = self._prepare_executor(data)
            self._prepare_storage_connectors(runtime_dir)
            self._lock_inputs_local_storage_locations(data)

            # Execute execution engine specific runtime preparation.
            execution_engine = data.process.run.get("language", None)
            volume_maps = self.get_execution_engine(
                execution_engine).prepare_runtime(runtime_dir, data)

            self._prepare_context(data,
                                  data_dir,
                                  runtime_dir,
                                  RUNTIME_VOLUME_MAPS=volume_maps)
            self._prepare_script(runtime_dir, program)

            argv = [
                "/bin/bash",
                "-c",
                getattr(settings, "FLOW_EXECUTOR", {}).get(
                    "PYTHON", "/usr/bin/env python") + " -m executors " +
                executor_module,
            ]
        except PermissionDenied as error:
            data.status = Data.STATUS_ERROR
            data.process_error.append(
                "Permission denied for process: {}".format(error))
            data.save()
            if hasattr(data, "worker"):
                data.worker.status = Worker.STATUS_ERROR_PREPARING
                data.worker.save()
            return
        except OSError as err:
            logger.error(
                __(
                    "OSError occurred while preparing data {} (will skip): {}",
                    data.id,
                    err,
                ))
            if hasattr(data, "worker"):
                data.worker.status = Worker.STATUS_ERROR_PREPARING
                data.worker.save()
            return

        # Hand off to the run() method for execution.
        logger.info(__("Running {}", runtime_dir))
        self.run(data, runtime_dir, argv)