Exemple #1
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if "flow_collection" in p:
                if "entity" in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p["slug"]))
                    continue

                p["entity"] = {"type": p.pop("flow_collection")}

            if p["type"][-1] != ":":
                p["type"] += ":"

            if "category" in p and not p["category"].endswith(":"):
                p["category"] += ":"

            for field in ["input", "output"]:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema["type"][-1].endswith(":"):
                        schema["type"] += ":"
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if "entity" in p:
                if "type" not in p["entity"]:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined"
                        .format(p["slug"]))
                    continue
                if "input" in p["entity"] and p["entity"].get(
                        "always_create", False):
                    self.stderr.write(
                        "Skip process {}: 'entity.input' will not be considered if 'entity.always_create' "
                        "is set to true.".format(p["slug"]))
                    continue

                p["entity_type"] = p["entity"]["type"]
                p["entity_descriptor_schema"] = p["entity"].get(
                    "descriptor_schema", p["entity_type"])
                p["entity_input"] = p["entity"].get("input", None)
                p["entity_always_create"] = p["entity"].get(
                    "always_create", False)
                p.pop("entity")

                if not DescriptorSchema.objects.filter(
                        slug=p["entity_descriptor_schema"]).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p["slug"],
                                        p["entity_descriptor_schema"]))
                    continue

            if "persistence" in p:
                persistence_mapping = {
                    "RAW": Process.PERSISTENCE_RAW,
                    "CACHED": Process.PERSISTENCE_CACHED,
                    "TEMP": Process.PERSISTENCE_TEMP,
                }

                p["persistence"] = persistence_mapping[p["persistence"]]

            if "scheduling_class" in p:
                scheduling_class_mapping = {
                    "interactive": Process.SCHEDULING_CLASS_INTERACTIVE,
                    "batch": Process.SCHEDULING_CLASS_BATCH,
                }

                p["scheduling_class"] = scheduling_class_mapping[
                    p["scheduling_class"]]

            if "input" in p:
                p["input_schema"] = p.pop("input")

            if "output" in p:
                p["output_schema"] = p.pop("output")

            slug = p["slug"]

            if "run" in p:
                # Set default language to 'bash' if not set.
                p["run"].setdefault("language", "bash")

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p["run"]["language"])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault("output_schema",
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p["run"]["language"]))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, "FLOW_CONTAINER_VALIDATE_IMAGE"):
                try:
                    container_image = dict_dot(
                        p, "requirements.executor.docker.image")
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p["version"]
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max("version"))["version__max"]
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #2
0
    def _processing_mountpoints(
        self, location_subpath: Path, execution_engine_name: str
    ):
        """Mountpoints for processing container.

        Processing and input volume (if defined) and all mountable connectors
        are mounted inside container. All except processing volume are mounted
        read-only.
        """
        mount_points = [
            {
                "name": storage_settings.FLOW_VOLUMES["processing"]["config"]["name"],
                "mountPath": os.fspath(constants.PROCESSING_VOLUME),
                "subPath": os.fspath(location_subpath),
                "readOnly": False,
            },
        ]
        if "input" in storage_settings.FLOW_VOLUMES:
            mount_points.append(
                {
                    "name": storage_settings.FLOW_VOLUMES["input"]["config"]["name"],
                    "mountPath": os.fspath(constants.INPUTS_VOLUME),
                    "readOnly": False,
                }
            )
        mount_points += [
            {
                "name": connector.name,
                "mountPath": f"/{storage_name}_{connector.name}",
                "readOnly": storage_name != "upload",
            }
            for storage_name, connector in self._get_mountable_connectors()
        ]

        mount_points += [
            {
                "name": "files-volume",
                "mountPath": "/etc/passwd",
                "subPath": "passwd",
            },
            {
                "name": "files-volume",
                "mountPath": "/etc/group",
                "subPath": "group",
            },
            {
                "name": "files-volume",
                "mountPath": "/socket_utils.py",
                "subPath": "socket-utils",
            },
            {
                "name": "files-volume",
                "mountPath": "/processing.py",
                "subPath": "startup-script",
            },
            {
                "name": "files-volume",
                "mountPath": "/constants.py",
                "subPath": "constants",
            },
            {
                "name": "sockets-volume",
                "mountPath": os.fspath(constants.SOCKETS_VOLUME),
            },
            {
                "name": "secrets-volume",
                "mountPath": os.fspath(constants.SECRETS_VOLUME),
                "readOnly": True,
            },
        ]

        if "tools" in storage_settings.FLOW_VOLUMES:
            # Create volumes for tools. In kubernetes all tools must be inside
            # volume of type persistent_volume or host_path.
            volume = storage_settings.FLOW_VOLUMES["tools"]
            preparer = BaseFlowExecutorPreparer()
            tools_paths = preparer.get_tools_paths()
            base_tools_path = volume["config"]["path"]
            subpath = volume["config"].get("subpath", "")
            if subpath:
                tools_paths = [f"{subpath}/{path}" for path in tools_paths]
            mount_points += [
                {
                    "name": volume["config"]["name"],
                    "mountPath": os.fspath(Path("/usr/local/bin/resolwe") / str(index)),
                    "subPath": os.fspath(Path(tool).relative_to(base_tools_path)),
                }
                for index, tool in enumerate(tools_paths)
            ]

        from resolwe.flow.managers import manager

        execution_engine = manager.get_execution_engine(execution_engine_name)
        runtime_volumes = execution_engine.prepare_volumes()
        subpath = storage_settings.FLOW_VOLUMES["runtime"]["config"].get("subpath", "")
        if subpath:
            runtime_volumes = {
                f"{subpath}/{src}": dst for src, dst in runtime_volumes.items()
            }

        mount_points += [
            {
                "name": storage_settings.FLOW_VOLUMES["runtime"]["config"]["name"],
                "mountPath": dst,
                "subPath": src,
            }
            for src, dst in runtime_volumes.items()
        ]
        return mount_points
Exemple #3
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            # get `data_name` from `static`
            if 'static' in p:
                for schema, _, _ in iterate_schema({}, p['static']):
                    if schema['name'] == 'name' and 'default' in schema:
                        p['data_name'] = schema['default']

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in p:
                p['slug'] = slugify(p.pop('name').replace(':', '-'))
                p['name'] = p.pop('label')

                p.pop('var', None)
                p.pop('static', None)

            for field in ['input', 'output', 'var', 'static']:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema',
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p['run']['language']))
                    continue

            version = p['version']
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if len(log_processors) > 0:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if len(log_templates) > 0:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #4
0
    def _processing_mountpoints(
        self, location_subpath: Path, execution_engine_name: str
    ):
        """Mountpoints for processing container.

        Processing and input volume (if defined) and all mountable connectors
        are mounted inside container. All except processing volume are mounted
        read-only.
        """
        mount_points = [
            {
                "name": constants.PROCESSING_VOLUME_NAME,
                "mountPath": os.fspath(constants.PROCESSING_VOLUME),
                "subPath": os.fspath(location_subpath),
                "readOnly": False,
            },
        ]
        if constants.INPUTS_VOLUME_NAME in storage_settings.FLOW_VOLUMES:
            mount_points.append(
                {
                    "name": constants.INPUTS_VOLUME_NAME,
                    "mountPath": os.fspath(constants.INPUTS_VOLUME),
                    "readOnly": False,
                }
            )
        mount_points += [
            {
                "name": connector.name,
                "mountPath": f"/{storage_name}_{connector.name}",
                "readOnly": storage_name != "upload",
            }
            for storage_name, connector in self._get_mountable_connectors()
        ]

        mount_points += [
            {
                "name": "files-volume",
                "mountPath": "/etc/passwd",
                "subPath": "passwd",
            },
            {
                "name": "files-volume",
                "mountPath": "/etc/group",
                "subPath": "group",
            },
            {
                "name": "files-volume",
                "mountPath": "/socket_utils.py",
                "subPath": "socket-utils",
            },
            {
                "name": "files-volume",
                "mountPath": "/processing.py",
                "subPath": "startup-script",
            },
            {
                "name": "files-volume",
                "mountPath": "/constants.py",
                "subPath": "constants",
            },
            {
                "name": "sockets-volume",
                "mountPath": os.fspath(constants.SOCKETS_VOLUME),
            },
            {
                "name": "secrets-volume",
                "mountPath": os.fspath(constants.SECRETS_VOLUME),
                "readOnly": True,
            },
        ]
        for tool_index in range(len(BaseFlowExecutorPreparer().get_tools_paths())):
            mount_points.append(
                {
                    "name": f"tools-{tool_index}",
                    "mountPath": f"/usr/local/bin/resolwe/{tool_index}",
                }
            )

        from resolwe.flow.managers import manager

        execution_engine = manager.get_execution_engine(execution_engine_name)
        runtime_volumes = execution_engine.prepare_volumes()
        subpath = storage_settings.FLOW_VOLUMES["runtime"]["config"].get("subpath", "")
        if subpath:
            runtime_volumes = {
                f"{subpath}/{src}": dst for src, dst in runtime_volumes.items()
            }

        mount_points += [
            {
                "name": "runtime",
                "mountPath": dst,
                "subPath": src,
            }
            for src, dst in runtime_volumes.items()
        ]
        return mount_points
Exemple #5
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[
                    p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema',
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p['run']['language']))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(
                        p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #6
0
    def register_processes(self, process_schemas, user, force=False, verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if 'flow_collection' in p:
                if 'entity' in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p['slug'])
                    )
                    continue

                p['entity'] = {'type': p.pop('flow_collection')}

            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema({}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'entity' in p:
                if 'type' not in p['entity']:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined".format(p['slug'])
                    )
                    continue

                p['entity_type'] = p['entity']['type']
                p['entity_descriptor_schema'] = p['entity'].get('descriptor_schema', p['entity_type'])
                p['entity_input'] = p['entity'].get('input', None)
                p.pop('entity')

                if not DescriptorSchema.objects.filter(slug=p['entity_descriptor_schema']).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p['slug'], p['entity_descriptor_schema'])
                    )
                    continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema', []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write("Skip processor {}: execution engine '{}' not supported".format(
                        slug, p['run']['language']
                    ))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image):
                        self.stderr.write("Skip processor {}: container image does not match '{}'".format(
                            slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                        ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip processor {}: same version installed".format(slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))