예제 #1
0
파일: utils.py 프로젝트: MaslovaEV/resolwe
def render_descriptor(data):
    """Render data descriptor.

    The rendering is based on descriptor schema and input context.

    :param data: data instance
    :type data: :class:`resolwe.flow.models.Data` or :class:`dict`

    """
    if not data.descriptor_schema:
        return

    inputs = copy.deepcopy(data.input)
    if data.process.input_schema:
        hydrate_input_references(inputs,
                                 data.process.input_schema,
                                 hydrate_values=False)
    template_context = inputs

    # Set default values
    for field_schema, field, path in iterate_schema(
            data.descriptor, data.descriptor_schema.schema, 'descriptor'):
        if 'default' in field_schema and field_schema['name'] not in field:
            tmpl = field_schema['default']
            if field_schema['type'].startswith('list:'):
                tmpl = [
                    render_template(data.process, tmp, template_context)
                    if isinstance(tmp, six.string_types) else tmp
                    for tmp in tmpl
                ]
            elif isinstance(tmpl, six.string_types):
                tmpl = render_template(data.process, tmpl, template_context)

            dict_dot(data, path, tmpl)
예제 #2
0
    def handle_update_model_fields(
        self, message: Message[Tuple[str, int, Dict[str, Any]]], manager: "Processor"
    ) -> Response[str]:
        """Update the value for the given fields.

        The received message format is
        (app_name, model name, model primary key, names -> values).

        Field name can be given in dot notation for JSON fields.

        :raises RuntimeError: if user has no permissions to modify the object.
        """
        app_name, model_name, model_pk, mapping = message.message_data
        full_model_name = f"{app_name}.{model_name}"

        # The most common request is for the data object we are processing.
        # Avoid hitting the database in such case.
        if full_model_name == "flow.Data" and model_pk == manager.data_id:
            model_instance = manager.data
            model = Data
        else:
            model = apps.get_model(app_name, model_name)
            model_instance = model.objects.filter(pk=model_pk).get()

        self._permission_manager.can_update(
            manager.contributor, full_model_name, model_instance, mapping, manager.data
        )

        # Update all fields except m2m.
        update_fields = []
        for field_name, field_value in mapping.items():
            # Not exactly sure how to handle this. Output is a JSONField and is
            # only updated, other JSON fields should probably be replaced.
            # Compromise: when update is a dict, then only values in dict are
            # updates, else replaced.
            if isinstance(model._meta.get_field(field_name), JSONField) and isinstance(
                field_value, dict
            ):
                update_fields.append(field_name)
                current_value = getattr(model_instance, field_name)
                for key, value in field_value.items():
                    dict_dot(current_value, key, value)
            elif isinstance(model._meta.get_field(field_name), ManyToManyField):
                assert isinstance(
                    field_value, list
                ), "Only lists may be assigned to many-to-many relations"
                field = getattr(model_instance, field_name)
                field_value_set = set(field_value)
                current_objects = set(field.all().values_list("pk", flat=True))
                objects_to_add = field_value_set - current_objects
                objects_to_remove = current_objects - field_value_set
                if objects_to_remove:
                    field.remove(*objects_to_remove)
                if objects_to_add:
                    field.add(*objects_to_add)
            else:
                update_fields.append(field_name)
                setattr(model_instance, field_name, field_value)
        model_instance.save(update_fields=update_fields)
        return message.respond_ok("OK")
예제 #3
0
def fill_with_defaults(process_input, input_schema):
    """Fill empty optional fields in input with default values."""
    for field_schema, fields, path in iterate_schema(
        process_input, input_schema, include_groups=True
    ):
        if "group" in field_schema and field_schema["name"] not in fields:
            dict_dot(process_input, path, {})
        if "default" in field_schema and field_schema["name"] not in fields:
            dict_dot(process_input, path, field_schema["default"])
예제 #4
0
    def process_object(self, obj):
        """Process current object and push it to the ElasticSearch."""
        document = self.document_class(meta={'id': self.generate_id(obj)})  # pylint: disable=not-callable

        for field in document._doc_type.mapping:  # pylint: disable=protected-access
            if field in ['users_with_permissions', 'groups_with_permissions', 'public_permission']:
                continue  # These fields are handled separately

            try:
                # use get_X_value function
                get_value_function = getattr(self, 'get_{}_value'.format(field), None)
                if get_value_function:
                    setattr(document, field, get_value_function(obj))  # pylint: disable=not-callable
                    continue

                # use `mapping` dict
                if field in self.mapping:
                    if callable(self.mapping[field]):
                        setattr(document, field, self.mapping[field](obj))
                        continue

                    try:
                        object_attr = dict_dot(obj, self.mapping[field])
                    except (KeyError, AttributeError):
                        object_attr = None

                    if callable(object_attr):
                        # use method on object
                        setattr(document, field, object_attr(obj))
                    else:
                        # use attribute on object
                        setattr(document, field, object_attr)
                    continue

                # get value from the object
                try:
                    object_value = dict_dot(obj, field)
                    setattr(document, field, object_value)
                    continue
                except KeyError:
                    pass

                raise AttributeError("Cannot determine mapping for field {}".format(field))

            except:  # noqa pylint: disable=bare-except
                logger.exception(
                    "Error occurred while setting value of field '%s' in '%s' Elasticsearch index.",
                    field, self.__class__.__name__,
                    extra={'object_type': self.object_type, 'obj_id': obj.pk}
                )

        permissions = self.get_permissions(obj)
        document.users_with_permissions = permissions['users']
        document.groups_with_permissions = permissions['groups']
        document.public_permission = permissions['public']

        self.push_queue.append(document)
예제 #5
0
파일: indices.py 프로젝트: genialis/resolwe
    def process_object(self, obj):
        """Process current object and push it to the ElasticSearch."""
        document = self.document_class(meta={'id': self.generate_id(obj)})

        for field in document._doc_type.mapping:  # pylint: disable=protected-access
            if field in ['users_with_permissions', 'groups_with_permissions', 'public_permission']:
                continue  # These fields are handled separately

            try:
                # use get_X_value function
                get_value_function = getattr(self, 'get_{}_value'.format(field), None)
                if get_value_function:
                    setattr(document, field, get_value_function(obj))  # pylint: disable=not-callable
                    continue

                # use `mapping` dict
                if field in self.mapping:
                    if callable(self.mapping[field]):
                        setattr(document, field, self.mapping[field](obj))
                        continue

                    try:
                        object_attr = dict_dot(obj, self.mapping[field])
                    except (KeyError, AttributeError):
                        object_attr = None

                    if callable(object_attr):
                        # use method on object
                        setattr(document, field, object_attr(obj))
                    else:
                        # use attribute on object
                        setattr(document, field, object_attr)
                    continue

                # get value from the object
                try:
                    object_value = dict_dot(obj, field)
                    setattr(document, field, object_value)
                    continue
                except KeyError:
                    pass

                raise AttributeError("Cannot determine mapping for field {}".format(field))

            except Exception:  # pylint: disable=broad-except
                logger.exception(
                    "Error occurred while setting value of field '%s' in '%s' Elasticsearch index.",
                    field, self.__class__.__name__,
                    extra={'object_type': self.object_type, 'obj_id': obj.pk}
                )

        permissions = self.get_permissions(obj)
        document.users_with_permissions = permissions['users']
        document.groups_with_permissions = permissions['groups']
        document.public_permission = permissions['public']

        self.push_queue.append(document)
예제 #6
0
 def handle_update_output(self, message: Message[Dict[str, Any]],
                          manager: "Processor") -> Response[str]:
     """Update data output."""
     for key, val in message.message_data.items():
         if key not in manager.storage_fields:
             dict_dot(manager.data.output, key, val)
         else:
             manager.save_storage(key, val)
     with transaction.atomic():
         manager._update_data({"output": manager.data.output})
     return message.respond_ok("OK")
예제 #7
0
    def process_object(self, obj, push=True):
        """Process current object and push it to the ElasticSearch."""
        document = self.document_class(meta={'id': self.generate_id(obj)})  # pylint: disable=not-callable

        for field in document._doc_type.mapping:  # pylint: disable=protected-access
            if field in ['users_with_permissions', 'groups_with_permissions']:
                continue  # These fields are handled separately

            # use get_X_value function
            get_value_function = getattr(self, 'get_{}_value'.format(field), None)
            if get_value_function:
                setattr(document, field, get_value_function(obj))
                continue

            # use `mapping` dict
            if field in self.mapping:
                if callable(self.mapping[field]):
                    setattr(document, field, self.mapping[field](obj))
                    continue

                try:
                    object_attr = dict_dot(obj, self.mapping[field])
                except (KeyError, AttributeError):
                    object_attr = None

                if callable(object_attr):
                    # use method on object
                    setattr(document, field, object_attr(obj))
                else:
                    # use attribute on object
                    setattr(document, field, object_attr)
                continue

            # get value from the object
            try:
                object_value = dict_dot(obj, field)
                setattr(document, field, object_value)
                continue
            except KeyError:
                pass

            raise AttributeError('Cannot determine mapping for field {}'.format(field))

        permissions = self.get_permissions(obj)
        document.users_with_permissions = permissions['users']
        document.groups_with_permissions = permissions['groups']
        document.public_permission = permissions['public']

        if push:
            document.save(refresh=True)
        else:
            self.push_queue.append(document)
예제 #8
0
    def handle_annotate(self, message: Message[dict],
                        manager: "Processor") -> Response[str]:
        """Handle an incoming ``Data`` object annotate request."""

        if manager.data.entity is None:
            raise RuntimeError(
                f"No entity to annotate for process '{manager.data.process.slug}'"
            )

        for key, val in message.message_data.items():
            dict_dot(manager.data.entity.descriptor, key, val)

        manager.data.entity.save()
        return message.respond_ok("OK")
예제 #9
0
def render_descriptor(data):
    """Render data descriptor.

    The rendering is based on descriptor schema and input context.

    :param data: data instance
    :type data: :class:`resolwe.flow.models.Data` or :class:`dict`

    """
    if not data.descriptor_schema:
        return

    # Set default values
    for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'):
        if 'default' in field_schema and field_schema['name'] not in field:
            dict_dot(data, path, field_schema['default'])
예제 #10
0
    def migrate_process_schema(self, process, schema, from_state):
        """Migrate process schema.

        :param process: Process instance
        :param schema: Process schema to migrate
        :param from_state: Database model state
        :return: True if the process was migrated, False otherwise
        """
        container = dict_dot(schema, ".".join(self.field[:-1]), default=list)

        # Ignore processes, which already contain the target field with the
        # target schema.
        for field in container:
            if field["name"] == self.field[-1]:
                if field == self.schema:
                    return False
                else:
                    raise ValueError(
                        "Failed to migrate schema for process '{process}' as the field '{field}' "
                        "already exists and has an incompatible schema".format(
                            process=process.slug, field=self.field[-1]
                        )
                    )

        # Add field to container.
        container.append(self.schema)
        return True
예제 #11
0
    def assertFiles(self, obj, field_path, fn_list, **kwargs):  # pylint: disable=invalid-name
        """Compare a process's output file to the given correct file.

        :param obj: object which includes the files to compare
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            list of file names

        :param list fn_list: list of file names (and relative paths) of
            files to compare against. Paths should be relative to the
            ``tests/files`` directory of a Django application.

        :param str compression: if not ``None``, files will be
            uncompressed with the appropriate compression library
            before comparison.
            Currently supported compression formats are *gzip* and
            *zip*.

        :param filter: Function for filtering the contents of output
            files. It is used in :obj:`itertools.filterfalse` function
            and takes one parameter, a line of the output file. If it
            returns ``True``, the line is excluded from comparison of
            the two files.
        :type filter: ~types.FunctionType

        """
        field = dict_dot(obj.output, field_path)

        if len(field) != len(fn_list):
            self.fail(msg="Lengths of list:basic:file field and files list are not equal.")

        for fn_tested, fn_correct in zip(field, fn_list):
            self._assert_file(obj, fn_tested['file'], fn_correct, **kwargs)
예제 #12
0
    def migrate_process_schema(self, process, schema, from_state):
        """Migrate process schema.

        :param process: Process instance
        :param schema: Process schema to migrate
        :param from_state: Database model state
        :return: True if the process was migrated, False otherwise
        """
        container = dict_dot(schema, '.'.join(self.field[:-1]), default=list)

        # Ignore processes, which already contain the target field with the
        # target schema.
        for field in container:
            if field['name'] == self.field[-1]:
                if field == self.schema:
                    return False
                else:
                    raise ValueError(
                        "Failed to migrate schema for process '{process}' as the field '{field}' "
                        "already exists and has an incompatible schema".format(
                            process=process.slug,
                            field=self.field[-1]
                        )
                    )

        # Add field to container.
        container.append(self.schema)
        return True
예제 #13
0
    def migrate_process_schema(self, process, schema, from_state):
        """Migrate process schema.

        :param process: Process instance
        :param schema: Process schema to migrate
        :param from_state: Database model state
        :return: True if the process was migrated, False otherwise
        """
        container = dict_dot(schema, '.'.join(self.field[:-1]), default=list)

        # Ignore processes, which already contain the target field.
        migrate = False
        for field in container:
            if field['name'] == self.field[-1]:
                field['name'] = self.new_field
                migrate = True
                break
            elif field['name'] == self.new_field:
                # Already has target field.
                migrate = False
                break
        else:
            if not self.skip_no_field:
                raise ValueError(
                    "Unable to rename: there is no field with name '{field}' or '{new_field}'.".format(
                        field=self.field[-1],
                        new_field=self.new_field,
                    )
                )

        return migrate
예제 #14
0
파일: utils.py 프로젝트: mstajdohar/resolwe
def render_descriptor(data):
    """Render data descriptor.

    The rendering is based on descriptor schema and input context.

    :param data: data instance
    :type data: :class:`resolwe.flow.models.Data` or :class:`dict`

    """
    if not data.descriptor_schema:
        return

    # Set default values
    for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'):
        if 'default' in field_schema and field_schema['name'] not in field:
            dict_dot(data, path, field_schema['default'])
예제 #15
0
    def migrate_process_schema(self, process, schema, from_state):
        """Migrate process schema.

        :param process: Process instance
        :param schema: Process schema to migrate
        :param from_state: Database model state
        :return: True if the process was migrated, False otherwise
        """
        container = dict_dot(schema, ".".join(self.field[:-1]), default=list)

        # Ignore processes, which already contain the target field.
        migrate = False
        for field in container:
            if field["name"] == self.field[-1]:
                field["name"] = self.new_field
                migrate = True
                break
            elif field["name"] == self.new_field:
                # Already has target field.
                migrate = False
                break
        else:
            if not self.skip_no_field:
                raise ValueError(
                    "Unable to rename: there is no field with name '{field}' or '{new_field}'.".format(
                        field=self.field[-1], new_field=self.new_field,
                    )
                )

        return migrate
예제 #16
0
파일: process.py 프로젝트: genialis/resolwe
    def assertFile(self, obj, field_path, fn, **kwargs):
        """Compare a process's output file to the given correct file.

        :param obj: object that includes the file to compare
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name

        :param str fn: file name (and relative path) of the correct
            file to compare against. Path should be relative to the
            ``tests/files`` directory of a Django application.

        :param str compression: if not ``None``, files will be
            uncompressed with the appropriate compression library
            before comparison.
            Currently supported compression formats are *gzip* and
            *zip*.

        :param filter: function for filtering the contents of output
            files. It is used in :func:`itertools.filterfalse` function
            and takes one parameter, a line of the output file. If it
            returns ``True``, the line is excluded from comparison of
            the two files.
        :type filter: ~types.FunctionType

        :param bool sort: if set to ``True``, basic sort will be performed
            on file contents before computing hash value.

        """
        field = dict_dot(obj.output, field_path)
        self._assert_file(obj, field["file"], fn, **kwargs)
예제 #17
0
    def assertFile(self, obj, field_path, fn, **kwargs):  # pylint: disable=invalid-name
        """Compare a process's output file to the given correct file.

        :param obj: object that includes the file to compare
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name

        :param str fn: file name (and relative path) of the correct
            file to compare against. Path should be relative to the
            ``tests/files`` directory of a Django application.

        :param str compression: if not ``None``, files will be
            uncompressed with the appropriate compression library
            before comparison.
            Currently supported compression formats are *gzip* and
            *zip*.

        :param filter: function for filtering the contents of output
            files. It is used in :func:`itertools.filterfalse` function
            and takes one parameter, a line of the output file. If it
            returns ``True``, the line is excluded from comparison of
            the two files.
        :type filter: ~types.FunctionType

        :param bool sort: if set to ``True``, basic sort will be performed
            on file contents before computing hash value.

        """
        field = dict_dot(obj.output, field_path)
        self._assert_file(obj, field['file'], fn, **kwargs)
예제 #18
0
파일: process.py 프로젝트: genialis/resolwe
    def assertDirStructure(self, obj, field_path, dir_struct, exact=True):
        """Assert correct tree structure in output field of given object.

        Only names of directories and files are asserted. Content of files is
        not compared.

        :param obj: object that includes the directory to compare
        :type obj: ~resolwe.flow.models.Data

        :param str dir_path: path to the directory to compare

        :param dict dir_struct: correct tree structure of the directory.
            Dictionary keys are directory and file names with the correct nested
            structure. Dictionary value associated with each directory is a new
            dictionary which lists the content of the directory. Dictionary
            value associated with each file name is ``None``

        :param bool exact: if ``True`` tested directory structure must exactly
            match `dir_struct`. If ``False`` `dir_struct` must be a partial
            structure of the directory to compare

        """
        self.assertDirExists(obj, field_path)
        field = dict_dot(obj.output, field_path)
        dir_path = obj.location.get_path(filename=field["dir"])
        self._assert_dir_structure(dir_path, dir_struct, exact)
예제 #19
0
def input_(data, field_path):
    """Return a hydrated value of the ``input`` field."""
    data_obj = Data.objects.get(id=data["__id"])

    inputs = copy.deepcopy(data_obj.input)
    # XXX: Optimize by hydrating only the required field (major refactoring).
    hydrate_input_references(inputs, data_obj.process.input_schema)
    hydrate_input_uploads(inputs, data_obj.process.input_schema)

    return dict_dot(inputs, field_path)
예제 #20
0
def input_(data, field_path):
    """Return a hydrated value of the ``input`` field."""
    data_obj = Data.objects.get(id=data['__id'])

    inputs = copy.deepcopy(data_obj.input)
    # XXX: Optimize by hydrating only the required field (major refactoring).
    hydrate_input_references(inputs, data_obj.process.input_schema)
    hydrate_input_uploads(inputs, data_obj.process.input_schema)

    return dict_dot(inputs, field_path)
예제 #21
0
def validate_process_types(queryset=None):
    """Perform process type validation.

    :param queryset: Optional process queryset to validate
    :return: A list of validation error strings
    """
    if not queryset:
        from .process import Process

        queryset = Process.objects.all()

    processes = {}
    for process in queryset:
        dict_dot(
            processes,
            process.type.replace(":", ".") + "__schema__",
            process.output_schema,
        )

    errors = []
    for path, key, value in iterate_dict(
        processes, exclude=lambda key, value: key == "__schema__"
    ):
        if "__schema__" not in value:
            continue

        # Validate with any parent types.
        for length in range(len(path), 0, -1):
            parent_type = ".".join(path[:length] + ["__schema__"])
            try:
                parent_schema = dict_dot(processes, parent_type)
            except KeyError:
                continue

            errors += validate_process_subtype(
                supertype_name=":".join(path[:length]),
                supertype=parent_schema,
                subtype_name=":".join(path + [key]),
                subtype=value["__schema__"],
            )

    return errors
예제 #22
0
    def assertJSON(self, obj, storage, field_path, file_name):
        """Compare JSON in Storage object to the given correct JSON.

        :param obj: object to which the
            :class:`~resolwe.flow.models.Storage` object belongs
        :type obj: ~resolwe.flow.models.Data

        :param storage: object or id which contains JSON to compare
        :type storage: :class:`~resolwe.flow.models.Storage` or
            :class:`str`

        :param str field_path: path to JSON subset in the
            :class:`~resolwe.flow.models.Storage`'s object to compare
            against. If it is empty, the entire object will be
            compared.

        :param str file_name: file name (and relative path) of the file
            with the correct JSON to compare against. Path should be
            relative to the ``tests/files`` directory of a Django
            application.

            .. note::

                The given JSON file should be compresed with *gzip* and
                have the ``.gz`` extension.

        """
        self.assertEqual(
            os.path.splitext(file_name)[1], ".gz", msg="File extension must be .gz"
        )

        if not isinstance(storage, Storage):
            storage = Storage.objects.get(pk=storage)

        storage_obj = dict_dot(storage.json, field_path)

        file_path = os.path.join(self.files_path, file_name)
        if not os.path.isfile(file_path):
            with gzip.open(file_path, mode="wt") as f:
                json.dump(storage_obj, f)

            self.fail(msg="Output file {} missing so it was created.".format(file_name))

        with gzip.open(file_path, mode="rt") as f:
            file_obj = json.load(f)

        self.assertAlmostEqualGeneric(
            storage_obj,
            file_obj,
            msg="Storage {} field '{}' does not match file {}".format(
                storage.id, field_path, file_name
            )
            + self._debug_info(obj),
        )
예제 #23
0
    def migrate_data(self, data, from_state):
        """Migrate data objects.

        :param data: Queryset containing all data objects that need
            to be migrated
        :param from_state: Database model state
        """
        if not self.default:
            return

        self.default.prepare(data, from_state)
        for instance in data:
            value = self.default.get_default_for(instance, from_state)
            if not value and not self.schema.get('required', True):
                continue

            # Set default value.
            container = getattr(instance, self.schema_type, {})
            dict_dot(container, '.'.join(self.field), value)
            setattr(instance, self.schema_type, container)
            instance.save()
예제 #24
0
    def migrate_data(self, data, from_state):
        """Migrate data objects.

        :param data: Queryset containing all data objects that need
            to be migrated
        :param from_state: Database model state
        """
        if not self.default:
            return

        self.default.prepare(data, from_state)
        for instance in data:
            value = self.default.get_default_for(instance, from_state)
            if not value and not self.schema.get("required", True):
                continue

            # Set default value.
            container = getattr(instance, self.schema_type, {})
            dict_dot(container, ".".join(self.field), value)
            setattr(instance, self.schema_type, container)
            instance.save()
예제 #25
0
def validate_process_types(queryset=None):
    """Perform process type validation.

    :param queryset: Optional process queryset to validate
    :return: A list of validation error strings
    """
    if not queryset:
        from .process import Process
        queryset = Process.objects.all()

    processes = {}
    for process in queryset:
        dict_dot(
            processes,
            process.type.replace(':', '.') + '__schema__',
            process.output_schema
        )

    errors = []
    for path, key, value in iterate_dict(processes, exclude=lambda key, value: key == '__schema__'):
        if '__schema__' not in value:
            continue

        # Validate with any parent types.
        for length in range(len(path), 0, -1):
            parent_type = '.'.join(path[:length] + ['__schema__'])
            try:
                parent_schema = dict_dot(processes, parent_type)
            except KeyError:
                continue

            errors += validate_process_subtype(
                supertype_name=':'.join(path[:length]),
                supertype=parent_schema,
                subtype_name=':'.join(path + [key]),
                subtype=value['__schema__']
            )

    return errors
예제 #26
0
파일: process.py 프로젝트: genialis/resolwe
    def assertFileExists(self, obj, field_path):
        """Ensure a file in the given object's field exists.

        :param obj: object that includes the file for which to check if
            it exists
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name/path
        """
        field = dict_dot(obj.output, field_path)
        output = obj.location.get_path(filename=field["file"])

        if not os.path.isfile(output):
            self.fail(msg="File {} does not exist.".format(field_path))
예제 #27
0
    def assertFileExists(self, obj, field_path):  # pylint: disable=invalid-name
        """Ensure a file in the given object's field exists.

        :param obj: object that includes the file for which to check if
            it exists
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name/path
        """
        field = dict_dot(obj.output, field_path)
        output = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(obj.pk), field['file'])

        if not os.path.isfile(output):
            self.fail(msg="File {} does not exist.".format(field_path))
예제 #28
0
    def assertFileExists(self, obj, field_path):  # pylint: disable=invalid-name
        """Ensure a file in the given object's field exists.

        :param obj: object that includes the file for which to check if
            it exists
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name/path
        """
        field = dict_dot(obj.output, field_path)
        output = obj.location.get_path(filename=field['file'])

        if not os.path.isfile(output):
            self.fail(msg="File {} does not exist.".format(field_path))
예제 #29
0
파일: utils.py 프로젝트: genialis/resolwe
def save_storage(data):
    """Parse output field and create Storage objects if needed."""
    for field_schema, fields, path in iterate_fields(
        data.output, data.process.output_schema, ""
    ):
        name = field_schema["name"]
        value = fields[name]
        if field_schema.get("type", "").startswith("basic:json:"):
            if value and not data.pk:
                raise ValidationError(
                    "Data object must be `created` before creating `basic:json:` fields"
                )

            if isinstance(value, int):
                # already in Storage
                continue

            if isinstance(value, str):
                file_path = data.location.get_path(filename=value)
                if os.path.isfile(file_path):
                    try:
                        with open(file_path) as file_handler:
                            value = json.load(file_handler)
                    except json.JSONDecodeError:
                        with open(file_path) as file_handler:
                            content = file_handler.read()
                            content = content.rstrip()
                            raise ValidationError(
                                "Value of '{}' must be a valid JSON, current: {}".format(
                                    name, content
                                )
                            )

            existing_storage_pk = None
            with suppress(KeyError):
                existing_storage_pk = dict_dot(data._original_output, path)

            if isinstance(existing_storage_pk, int):
                data.storages.filter(pk=existing_storage_pk).update(json=value)
                fields[name] = existing_storage_pk
            else:
                storage = data.storages.create(
                    name="Storage for data id {}".format(data.pk),
                    contributor=data.contributor,
                    json=value,
                )
                fields[name] = storage.pk
예제 #30
0
    def assertJSON(self, obj, storage, field_path, file_name):  # pylint: disable=invalid-name
        """Compare JSON in Storage object to the given correct JSON.

        :param obj: object to which the
            :class:`~resolwe.flow.models.Storage` object belongs
        :type obj: ~resolwe.flow.models.Data

        :param storage: object or id which contains JSON to compare
        :type storage: :class:`~resolwe.flow.models.Storage` or
            :class:`str`

        :param str field_path: path to JSON subset in the
            :class:`~resolwe.flow.models.Storage`'s object to compare
            against. If it is empty, the entire object will be
            compared.

        :param str file_name: file name (and relative path) of the file
            with the correct JSON to compare against. Path should be
            relative to the ``tests/files`` directory of a Django
            application.

            .. note::

                The given JSON file should be compresed with *gzip* and
                have the ``.gz`` extension.

        """
        self.assertEqual(os.path.splitext(file_name)[1], '.gz', msg='File extension must be .gz')

        if not isinstance(storage, Storage):
            storage = Storage.objects.get(pk=storage)

        storage_obj = dict_dot(storage.json, field_path)

        file_path = os.path.join(self.files_path, file_name)
        if not os.path.isfile(file_path):
            with gzip.open(file_path, mode='wt') as f:
                json.dump(storage_obj, f)

            self.fail(msg="Output file {} missing so it was created.".format(file_name))

        with gzip.open(file_path, mode='rt') as f:
            file_obj = json.load(f)

        self.assertAlmostEqualGeneric(storage_obj, file_obj,
                                      msg="Storage {} field '{}' does not match file {}".format(
                                          storage.id, field_path, file_name) + self._debug_info(obj))
예제 #31
0
파일: process.py 프로젝트: lukaw3d/resolwe
    def assertFilesExist(self, obj, field_path):  # pylint: disable=invalid-name
        """Ensure files in the given object's field exists.

        :param obj: object that includes list of files for which to check
            existance
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name/path
        """
        field = dict_dot(obj.output, field_path)

        for item in field:
            output_file = obj.location.get_path(filename=item['file'])
            if not os.path.isfile(output_file):
                self.fail(msg="File {} in output field {} does not exist.".format(item['file'], field_path))
예제 #32
0
def descriptor(obj, path=''):
    """Return descriptor of given object.

    If ``path`` is specified, only the content on that path is
    returned.
    """
    if isinstance(obj, dict):
        # Current object is hydrated, so we need to get descriptor from
        # dict representation.
        desc = obj['__descriptor']
    else:
        desc = obj.descriptor

    resp = dict_dot(desc, path)

    if isinstance(resp, list) or isinstance(resp, dict):
        return json.dumps(resp)

    return resp
예제 #33
0
def descriptor(obj, path=""):
    """Return descriptor of given object.

    If ``path`` is specified, only the content on that path is
    returned.
    """
    if isinstance(obj, dict):
        # Current object is hydrated, so we need to get descriptor from
        # dict representation.
        desc = obj["__descriptor"]
    else:
        desc = obj.descriptor

    resp = dict_dot(desc, path)

    if isinstance(resp, list) or isinstance(resp, dict):
        return json.dumps(resp)

    return resp
예제 #34
0
파일: process.py 프로젝트: genialis/resolwe
    def assertDir(self, obj, field_path, fn):
        """Compare process output directory to correct compressed directory.

        :param obj: object that includes the directory to compare
        :type obj: ~resolwe.flow.models.Data

        :param str field_path: path to
            :class:`~resolwe.flow.models.Data` object's field with the
            file name

        :param str fn: file name (and relative path) of the correct compressed
            directory to compare against. Path should be relative to the
            ``tests/files`` directory of a Django application. Compressed
            directory needs to be in ``tar.gz`` format.

        """
        self.assertDirExists(obj, field_path)
        field = dict_dot(obj.output, field_path)
        dir_path = obj.location.get_path(filename=field["dir"])
        self._assert_dir(dir_path, fn)
예제 #35
0
    def handle_update(self, obj, internal_call=False):
        """Handle an incoming ``Data`` object update request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'update',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data`
                               object this command changes],
                    'changeset': {
                        [keys to be changed]
                    }
                }

        :param internal_call: If ``True``, this is an internal delegate
            call, so a reply to the executor won't be sent.
        """
        data_id = obj[ExecutorProtocol.DATA_ID]
        changeset = obj[ExecutorProtocol.UPDATE_CHANGESET]
        if not internal_call:
            logger.debug(__(
                "Handling update for Data with id {} (handle_update).",
                data_id),
                         extra={
                             'data_id': data_id,
                             'packet': obj
                         })
        try:
            d = Data.objects.get(pk=data_id)
        except Data.DoesNotExist:
            logger.warning("Data object does not exist (handle_update).",
                           extra={
                               'data_id': data_id,
                           })

            if not internal_call:
                async_to_sync(self._send_reply)(
                    obj, {
                        ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR
                    })

            async_to_sync(consumer.send_event)({
                WorkerProtocol.COMMAND:
                WorkerProtocol.ABORT,
                WorkerProtocol.DATA_ID:
                obj[ExecutorProtocol.DATA_ID],
                WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                    'executor':
                    getattr(settings, 'FLOW_EXECUTOR',
                            {}).get('NAME', 'resolwe.flow.executors.local'),
                },
            })

            return

        if changeset.get('status', None) == Data.STATUS_ERROR:
            logger.error(__(
                "Error occured while running process '{}' (handle_update).",
                d.process.slug),
                         extra={
                             'data_id':
                             data_id,
                             'api_url':
                             '{}{}'.format(
                                 getattr(settings, 'RESOLWE_HOST_URL', ''),
                                 reverse('resolwe-api:data-detail',
                                         kwargs={'pk': data_id})),
                         })

        if d.status == Data.STATUS_ERROR:
            changeset['status'] = Data.STATUS_ERROR

        if not d.started:
            changeset['started'] = now()
        changeset['modified'] = now()

        for key, val in changeset.items():
            if key in ['process_error', 'process_warning', 'process_info']:
                # Trim process_* fields to not exceed max length of the database field.
                for i, entry in enumerate(val):
                    max_length = Data._meta.get_field(
                        key).base_field.max_length  # pylint: disable=protected-access
                    if len(entry) > max_length:
                        val[i] = entry[:max_length - 3] + '...'

                getattr(d, key).extend(val)

            elif key != 'output':
                setattr(d, key, val)

        if 'output' in changeset:
            if not isinstance(d.output, dict):
                d.output = {}
            for key, val in changeset['output'].items():
                dict_dot(d.output, key, val)

        try:
            d.save(update_fields=list(changeset.keys()))
        except ValidationError as exc:
            logger.error(__(
                "Validation error when saving Data object of process '{}' (handle_update):\n\n{}",
                d.process.slug, traceback.format_exc()),
                         extra={'data_id': data_id})

            d.refresh_from_db()

            d.process_error.append(exc.message)
            d.status = Data.STATUS_ERROR

            try:
                d.save(update_fields=['process_error', 'status'])
            except Exception:  # pylint: disable=broad-except
                pass
        except Exception:  # pylint: disable=broad-except
            logger.error(__(
                "Error when saving Data object of process '{}' (handle_update):\n\n{}",
                d.process.slug, traceback.format_exc()),
                         extra={'data_id': data_id})

        if not internal_call:
            async_to_sync(self._send_reply)(
                obj, {
                    ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK
                })
예제 #36
0
파일: data.py 프로젝트: genialis/resolwe
    def create_entity(self):
        """Create entity if `flow_collection` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `flow_collection` field
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` or do not belong to
        any `Entity`, create new `Entity`

        """
        entity_type = self.process.entity_type  # pylint: disable=no-member
        entity_descriptor_schema = self.process.entity_descriptor_schema  # pylint: disable=no-member
        entity_input = self.process.entity_input  # pylint: disable=no-member

        if entity_type:
            data_filter = {}
            if entity_input:
                input_id = dict_dot(self.input, entity_input, default=lambda: None)
                if input_id is None:
                    logger.warning("Skipping creation of entity due to missing input.")
                    return
                if isinstance(input_id, int):
                    data_filter['data__pk'] = input_id
                elif isinstance(input_id, list):
                    data_filter['data__pk__in'] = input_id
                else:
                    raise ValueError(
                        "Cannot create entity due to invalid value of field {}.".format(entity_input)
                    )
            else:
                data_filter['data__in'] = self.parents.all()  # pylint: disable=no-member

            entity_query = Entity.objects.filter(type=entity_type, **data_filter).distinct()
            entity_count = entity_query.count()

            if entity_count == 0:
                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=entity_descriptor_schema
                ).latest()
                entity = Entity.objects.create(
                    contributor=self.contributor,
                    descriptor_schema=descriptor_schema,
                    type=entity_type,
                    name=self.name,
                    tags=self.tags,
                )
                assign_contributor_permissions(entity)

            elif entity_count == 1:
                entity = entity_query.first()
                copy_permissions(entity, self)

            else:
                logger.info("Skipping creation of entity due to multiple entities found.")
                entity = None

            if entity:
                entity.data.add(self)
                # Inherit collections from entity.
                for collection in entity.collections.all():
                    collection.data.add(self)
예제 #37
0
파일: data.py 프로젝트: jberci/resolwe
    def create_entity(self):
        """Create entity if `flow_collection` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `flow_collwection` field
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` or do not belong to
        any `Entity`, create new `Entity`

        """
        entity_type = self.process.entity_type  # pylint: disable=no-member
        entity_descriptor_schema = self.process.entity_descriptor_schema  # pylint: disable=no-member
        entity_input = self.process.entity_input  # pylint: disable=no-member

        if entity_type:
            data_filter = {}
            if entity_input:
                input_id = dict_dot(self.input,
                                    entity_input,
                                    default=lambda: None)
                if input_id is None:
                    logger.warning(
                        "Skipping creation of entity due to missing input.")
                    return
                if isinstance(input_id, int):
                    data_filter['data__pk'] = input_id
                elif isinstance(input_id, list):
                    data_filter['data__pk__in'] = input_id
                else:
                    raise ValueError(
                        "Cannot create entity due to invalid value of field {}."
                        .format(entity_input))
            else:
                data_filter['data__in'] = self.parents.all()  # pylint: disable=no-member

            entity_query = Entity.objects.filter(type=entity_type,
                                                 **data_filter).distinct()
            entity_count = entity_query.count()

            if entity_count == 0:
                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=entity_descriptor_schema).latest()
                entity = Entity.objects.create(
                    contributor=self.contributor,
                    descriptor_schema=descriptor_schema,
                    type=entity_type,
                    name=self.name,
                    tags=self.tags,
                )
                assign_contributor_permissions(entity)

            elif entity_count == 1:
                entity = entity_query.first()
                copy_permissions(entity, self)

            else:
                logger.info(
                    "Skipping creation of entity due to multiple entities found."
                )
                entity = None

            if entity:
                entity.data.add(self)
                # Inherite collections from entity.
                for collection in entity.collections.all():
                    collection.data.add(self)
예제 #38
0
    def _handle_entity(obj):
        """Create entity if `entity.type` is defined in process.

        Following rules applies for adding `Data` object to `Entity`:
        * Only add `Data object` to `Entity` if process has defined
        `entity.type` field
        * Create new entity if parents do not belong to any `Entity`
        * Add object to existing `Entity`, if all parents that are part
        of it (but not necessary all parents), are part of the same
        `Entity`
        * If parents belong to different `Entities` don't do anything

        """
        entity_type = obj.process.entity_type
        entity_descriptor_schema = obj.process.entity_descriptor_schema
        entity_input = obj.process.entity_input
        entity_always_create = obj.process.entity_always_create
        operation = HandleEntityOperation.PASS

        if entity_type:
            data_filter = {}
            if entity_input:
                input_id = dict_dot(obj.input,
                                    entity_input,
                                    default=lambda: None)
                if input_id is None:
                    logger.warning(
                        "Skipping creation of entity due to missing input.")
                    return
                if isinstance(input_id, int):
                    data_filter["data__pk"] = input_id
                elif isinstance(input_id, list):
                    data_filter["data__pk__in"] = input_id
                else:
                    raise ValueError(
                        "Cannot create entity due to invalid value of field {}."
                        .format(entity_input))
            else:
                data_filter["data__in"] = obj.parents.all()

            entity_query = Entity.objects.filter(type=entity_type,
                                                 **data_filter).distinct()
            entity_count = entity_query.count()

            if entity_count == 0 or entity_always_create:
                descriptor_schema = DescriptorSchema.objects.filter(
                    slug=entity_descriptor_schema).latest()
                entity = Entity.objects.create(
                    contributor=obj.contributor,
                    descriptor_schema=descriptor_schema,
                    type=entity_type,
                    name=obj.name,
                    tags=obj.tags,
                )
                assign_contributor_permissions(entity)
                operation = HandleEntityOperation.CREATE

            elif entity_count == 1:
                entity = entity_query.first()
                obj.tags = entity.tags
                copy_permissions(entity, obj)
                operation = HandleEntityOperation.ADD

            else:
                logger.info(
                    "Skipping creation of entity due to multiple entities found."
                )
                entity = None

            if entity:
                obj.entity = entity
                obj.save()

            return operation
예제 #39
0
    def handle_update(self, obj, internal_call=False):
        """Handle an incoming ``Data`` object update request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'update',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data`
                               object this command changes],
                    'changeset': {
                        [keys to be changed]
                    }
                }

        :param internal_call: If ``True``, this is an internal delegate
            call, so a reply to the executor won't be sent.
        """
        data_id = obj[ExecutorProtocol.DATA_ID]
        changeset = obj[ExecutorProtocol.UPDATE_CHANGESET]
        if not internal_call:
            logger.debug(
                __("Handling update for Data with id {} (handle_update).", data_id),
                extra={
                    'data_id': data_id,
                    'packet': obj
                }
            )
        try:
            d = Data.objects.get(pk=data_id)
        except Data.DoesNotExist:
            logger.warning(
                "Data object does not exist (handle_update).",
                extra={
                    'data_id': data_id,
                }
            )

            if not internal_call:
                async_to_sync(self._send_reply)(obj, {ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR})

            async_to_sync(consumer.send_event)({
                WorkerProtocol.COMMAND: WorkerProtocol.ABORT,
                WorkerProtocol.DATA_ID: obj[ExecutorProtocol.DATA_ID],
                WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                    'executor': getattr(settings, 'FLOW_EXECUTOR', {}).get('NAME', 'resolwe.flow.executors.local'),
                },
            })

            return

        if changeset.get('status', None) == Data.STATUS_ERROR:
            logger.error(
                __("Error occured while running process '{}' (handle_update).", d.process.slug),
                extra={
                    'data_id': data_id,
                    'api_url': '{}{}'.format(
                        getattr(settings, 'RESOLWE_HOST_URL', ''),
                        reverse('resolwe-api:data-detail', kwargs={'pk': data_id})
                    ),
                }
            )

        if d.status == Data.STATUS_ERROR:
            changeset['status'] = Data.STATUS_ERROR

        if not d.started:
            changeset['started'] = now()
        changeset['modified'] = now()

        for key, val in changeset.items():
            if key in ['process_error', 'process_warning', 'process_info']:
                # Trim process_* fields to not exceed max length of the database field.
                for i, entry in enumerate(val):
                    max_length = Data._meta.get_field(key).base_field.max_length  # pylint: disable=protected-access
                    if len(entry) > max_length:
                        val[i] = entry[:max_length - 3] + '...'

                getattr(d, key).extend(val)

            elif key != 'output':
                setattr(d, key, val)

        if 'output' in changeset:
            if not isinstance(d.output, dict):
                d.output = {}
            for key, val in changeset['output'].items():
                dict_dot(d.output, key, val)

        try:
            d.save(update_fields=list(changeset.keys()))
        except ValidationError as exc:
            logger.error(
                __(
                    "Validation error when saving Data object of process '{}' (handle_update):\n\n{}",
                    d.process.slug,
                    traceback.format_exc()
                ),
                extra={
                    'data_id': data_id
                }
            )

            d.refresh_from_db()

            d.process_error.append(exc.message)
            d.status = Data.STATUS_ERROR

            try:
                d.save(update_fields=['process_error', 'status'])
            except Exception:  # pylint: disable=broad-except
                pass
        except Exception:  # pylint: disable=broad-except
            logger.error(
                __(
                    "Error when saving Data object of process '{}' (handle_update):\n\n{}",
                    d.process.slug,
                    traceback.format_exc()
                ),
                extra={
                    'data_id': data_id
                }
            )

        if not internal_call:
            async_to_sync(self._send_reply)(obj, {ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK})
예제 #40
0
def fill_with_defaults(process_input, input_schema):
    """Fill empty optional fields in input with default values."""
    for field_schema, fields, path in iterate_schema(process_input, input_schema):
        if 'default' in field_schema and field_schema['name'] not in fields:
            dict_dot(process_input, path, field_schema['default'])
예제 #41
0
    def handle_annotate(self, obj):
        """Handle an incoming ``Data`` object annotate request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'annotate',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data`
                               object this command annotates],
                    'annotations': {
                        [annotations to be added/updated]
                    }
                }
        """
        def report_failure():
            self.unlock_all_inputs(obj[ExecutorProtocol.DATA_ID])

            async_to_sync(self._send_reply)(
                obj, {
                    ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR
                })
            async_to_sync(consumer.send_event)({
                WorkerProtocol.COMMAND:
                WorkerProtocol.ABORT,
                WorkerProtocol.DATA_ID:
                obj[ExecutorProtocol.DATA_ID],
                WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                    "executor":
                    getattr(settings, "FLOW_EXECUTOR",
                            {}).get("NAME", "resolwe.flow.executors.local"),
                },
            })

        data_id = obj[ExecutorProtocol.DATA_ID]
        annotations = obj[ExecutorProtocol.ANNOTATIONS]

        logger.debug(
            __("Handling annotate for Data with id {} (handle_annotate).",
               data_id),
            extra={
                "data_id": data_id,
                "packet": obj
            },
        )
        try:
            d = Data.objects.get(pk=data_id)
        except Data.DoesNotExist:
            logger.warning(
                "Data object does not exist (handle_annotate).",
                extra={"data_id": data_id},
            )
            report_failure()
            return

        if d.entity is None:
            logger.error(
                __(
                    "No entity to annotate for process '{}' (handle_annotate):\n\n{}",
                    d.process.slug,
                    traceback.format_exc(),
                ),
                extra={"data_id": data_id},
            )
            d.process_error.append(
                "No entity to annotate for process '{}' (handle_annotate)".
                format(d.process.slug))
            d.status = Data.STATUS_ERROR

            with suppress(Exception):
                d.save(update_fields=["process_error", "status"])
            report_failure()
            return

        for key, val in annotations.items():
            logger.debug(
                __("Annotating entity {}: {} -> {}", d.entity, key, val))
            dict_dot(d.entity.descriptor, key, val)

        try:
            d.entity.save()
            async_to_sync(self._send_reply)(
                obj, {
                    ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK
                })
        except ValidationError as exc:
            logger.error(
                __(
                    "Validation error when saving Data object of process '{}' (handle_annotate):\n\n{}",
                    d.process.slug,
                    traceback.format_exc(),
                ),
                extra={"data_id": data_id},
            )
            d.refresh_from_db()
            d.process_error.append(exc.message)
            d.status = Data.STATUS_ERROR
            with suppress(Exception):
                d.save(update_fields=["process_error", "status"])
            report_failure()
예제 #42
0
    def handle_update(self, obj, internal_call=False):
        """Handle an incoming ``Data`` object update request.

        :param obj: The Channels message object. Command object format:

            .. code-block:: none

                {
                    'command': 'update',
                    'data_id': [id of the :class:`~resolwe.flow.models.Data`
                               object this command changes],
                    'changeset': {
                        [keys to be changed]
                    }
                }

        :param internal_call: If ``True``, this is an internal delegate
            call, so a reply to the executor won't be sent.
        """
        data_id = obj[ExecutorProtocol.DATA_ID]
        changeset = obj[ExecutorProtocol.UPDATE_CHANGESET]
        if not internal_call:
            logger.debug(
                __("Handling update for Data with id {} (handle_update).",
                   data_id),
                extra={
                    "data_id": data_id,
                    "packet": obj
                },
            )
        try:
            d = Data.objects.get(pk=data_id)
        except Data.DoesNotExist:
            logger.warning(
                "Data object does not exist (handle_update).",
                extra={"data_id": data_id},
            )

            if not internal_call:
                async_to_sync(self._send_reply)(
                    obj, {
                        ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_ERROR
                    })

            async_to_sync(consumer.send_event)({
                WorkerProtocol.COMMAND:
                WorkerProtocol.ABORT,
                WorkerProtocol.DATA_ID:
                obj[ExecutorProtocol.DATA_ID],
                WorkerProtocol.FINISH_COMMUNICATE_EXTRA: {
                    "executor":
                    getattr(settings, "FLOW_EXECUTOR",
                            {}).get("NAME", "resolwe.flow.executors.local"),
                },
            })

            return

        if changeset.get("status", None) == Data.STATUS_ERROR:
            logger.error(
                __(
                    "Error occured while running process '{}' (handle_update).",
                    d.process.slug,
                ),
                extra={
                    "data_id":
                    data_id,
                    "api_url":
                    "{}{}".format(
                        getattr(settings, "RESOLWE_HOST_URL", ""),
                        reverse("resolwe-api:data-detail",
                                kwargs={"pk": data_id}),
                    ),
                },
            )
            self.unlock_all_inputs(data_id)

        if d.status == Data.STATUS_ERROR:
            changeset["status"] = Data.STATUS_ERROR

        if not d.started:
            changeset["started"] = now()
        changeset["modified"] = now()

        for key, val in changeset.items():
            if key in ["process_error", "process_warning", "process_info"]:
                # Trim process_* fields to not exceed max length of the database field.
                for i, entry in enumerate(val):
                    max_length = Data._meta.get_field(
                        key).base_field.max_length
                    if len(entry) > max_length:
                        val[i] = entry[:max_length - 3] + "..."

                getattr(d, key).extend(val)

            elif key != "output":
                setattr(d, key, val)

        if "output" in changeset:
            if not isinstance(d.output, dict):
                d.output = {}
            for key, val in changeset["output"].items():
                dict_dot(d.output, key, val)

        try:
            d.save(update_fields=list(changeset.keys()))
        except ValidationError as exc:
            logger.error(
                __(
                    "Validation error when saving Data object of process '{}' (handle_update):\n\n{}",
                    d.process.slug,
                    traceback.format_exc(),
                ),
                extra={"data_id": data_id},
            )
            d.refresh_from_db()
            d.process_error.append(exc.message)
            d.status = Data.STATUS_ERROR
            with suppress(Exception):
                d.save(update_fields=["process_error", "status"])
            self.unlock_all_inputs(data_id)

        except Exception:
            logger.error(
                __(
                    "Error when saving Data object of process '{}' (handle_update):\n\n{}",
                    d.process.slug,
                    traceback.format_exc(),
                ),
                extra={"data_id": data_id},
            )

        try:
            # Update referenced files. Since entire output is sent every time
            # just delete and recreate objects. Computing changes and updating
            # would probably be slower.
            if "output" in changeset:
                storage_location = d.location.default_storage_location
                ReferencedPath.objects.filter(
                    storage_locations=storage_location).delete()
                referenced_paths = [
                    ReferencedPath(path=path)
                    for path in referenced_files(d, include_descriptor=False)
                ]
                ReferencedPath.objects.bulk_create(referenced_paths)
                storage_location.files.add(*referenced_paths)
        except Exception:
            logger.error(
                __(
                    "Error when saving ReferencedFile objects of process '{}' (handle_update):\n\n{}",
                    d.process.slug,
                    traceback.format_exc(),
                ),
                extra={"data_id": data_id},
            )

        if not internal_call:
            async_to_sync(self._send_reply)(
                obj, {
                    ExecutorProtocol.RESULT: ExecutorProtocol.RESULT_OK
                })
예제 #43
0
    def create(self, request, *args, **kwargs):
        """Create a resource."""
        collections = request.data.get('collections', [])

        # check that user has permissions on all collections that Data
        # object will be added to
        for collection_id in collections:
            try:
                collection = Collection.objects.get(pk=collection_id)
            except Collection.DoesNotExist:
                return Response(
                    {
                        'collections': [
                            'Invalid pk "{}" - object does not exist.'.format(
                                collection_id)
                        ]
                    },
                    status=status.HTTP_400_BAD_REQUEST)

            if not request.user.has_perm('add_collection', obj=collection):
                if request.user.has_perm('view_collection', obj=collection):
                    raise exceptions.PermissionDenied(
                        "You don't have `ADD` permission on collection (id: {})."
                        .format(collection_id))
                else:
                    raise exceptions.NotFound(
                        "Collection not found (id: {}).".format(collection_id))

        # translate processe's slug to id
        process_slug = request.data.get('process', None)
        process_query = Process.objects.filter(slug=process_slug)
        process_query = get_objects_for_user(request.user, 'view_process',
                                             process_query)
        try:
            process = process_query.latest()
        except Process.DoesNotExist:
            return Response(
                {
                    'process': [
                        'Invalid process slug "{}" - object does not exist.'.
                        format(process_slug)
                    ]
                },
                status=status.HTTP_400_BAD_REQUEST)
        request.data['process'] = process.pk

        # perform "get_or_create" if requested - return existing object
        # if found
        if kwargs.pop('get_or_create', False):
            process_input = request.data.get('input', {})

            # use default values if they are not given
            for field_schema, fields, path in iterate_schema(
                    process_input, process.input_schema):
                if 'default' in field_schema and field_schema[
                        'name'] not in fields:
                    dict_dot(process_input, path, field_schema['default'])

            checksum = get_data_checksum(process_input, process.slug,
                                         process.version)
            data_qs = Data.objects.filter(
                checksum=checksum,
                process__persistence__in=[
                    Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP
                ],
            )
            data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
            if data_qs.exists():
                data = data_qs.order_by('created').last()
                serializer = self.get_serializer(data)
                return Response(serializer.data)

        # create the objects
        resp = super(DataViewSet, self).create(request, *args, **kwargs)

        # run manager
        manager.communicate()

        return resp
예제 #44
0
    def register_processes(self, process_schemas, user, force=False, verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if 'flow_collection' in p:
                if 'entity' in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p['slug'])
                    )
                    continue

                p['entity'] = {'type': p.pop('flow_collection')}

            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema({}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'entity' in p:
                if 'type' not in p['entity']:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined".format(p['slug'])
                    )
                    continue

                p['entity_type'] = p['entity']['type']
                p['entity_descriptor_schema'] = p['entity'].get('descriptor_schema', p['entity_type'])
                p['entity_input'] = p['entity'].get('input', None)
                p.pop('entity')

                if not DescriptorSchema.objects.filter(slug=p['entity_descriptor_schema']).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p['slug'], p['entity_descriptor_schema'])
                    )
                    continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema', []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write("Skip processor {}: execution engine '{}' not supported".format(
                        slug, p['run']['language']
                    ))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image):
                        self.stderr.write("Skip processor {}: container image does not match '{}'".format(
                            slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                        ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip processor {}: same version installed".format(slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
예제 #45
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if "flow_collection" in p:
                if "entity" in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p["slug"]))
                    continue

                p["entity"] = {"type": p.pop("flow_collection")}

            if p["type"][-1] != ":":
                p["type"] += ":"

            if "category" in p and not p["category"].endswith(":"):
                p["category"] += ":"

            for field in ["input", "output"]:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema["type"][-1].endswith(":"):
                        schema["type"] += ":"
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if "entity" in p:
                if "type" not in p["entity"]:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined"
                        .format(p["slug"]))
                    continue
                if "input" in p["entity"] and p["entity"].get(
                        "always_create", False):
                    self.stderr.write(
                        "Skip process {}: 'entity.input' will not be considered if 'entity.always_create' "
                        "is set to true.".format(p["slug"]))
                    continue

                p["entity_type"] = p["entity"]["type"]
                p["entity_descriptor_schema"] = p["entity"].get(
                    "descriptor_schema", p["entity_type"])
                p["entity_input"] = p["entity"].get("input", None)
                p["entity_always_create"] = p["entity"].get(
                    "always_create", False)
                p.pop("entity")

                if not DescriptorSchema.objects.filter(
                        slug=p["entity_descriptor_schema"]).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p["slug"],
                                        p["entity_descriptor_schema"]))
                    continue

            if "persistence" in p:
                persistence_mapping = {
                    "RAW": Process.PERSISTENCE_RAW,
                    "CACHED": Process.PERSISTENCE_CACHED,
                    "TEMP": Process.PERSISTENCE_TEMP,
                }

                p["persistence"] = persistence_mapping[p["persistence"]]

            if "scheduling_class" in p:
                scheduling_class_mapping = {
                    "interactive": Process.SCHEDULING_CLASS_INTERACTIVE,
                    "batch": Process.SCHEDULING_CLASS_BATCH,
                }

                p["scheduling_class"] = scheduling_class_mapping[
                    p["scheduling_class"]]

            if "input" in p:
                p["input_schema"] = p.pop("input")

            if "output" in p:
                p["output_schema"] = p.pop("output")

            slug = p["slug"]

            if "run" in p:
                # Set default language to 'bash' if not set.
                p["run"].setdefault("language", "bash")

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p["run"]["language"])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault("output_schema",
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p["run"]["language"]))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, "FLOW_CONTAINER_VALIDATE_IMAGE"):
                try:
                    container_image = dict_dot(
                        p, "requirements.executor.docker.image")
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p["version"]
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max("version"))["version__max"]
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))