Exemple #1
0
    def make_process_node(self, process):
        """Fill the content of process definiton node.

        :param dict process: process data as given from yaml.load function
        :return: process node

        """
        name = process['name']
        slug = process['slug']
        typ = process['type']
        version = process['version']
        description = process.get('description', '')
        source_uri = process['source_uri']
        inputs = process.get('input', [])
        outputs = process.get('output', [])

        # Make process name a section title:
        section = nodes.section(ids=['process-' + slug])
        section += nodes.title(name, name)

        # Make process header:
        section += self.make_process_header(slug, typ, version, source_uri,
                                            description, inputs)

        # Make inputs section:
        container_node = nodes.container(classes=['toggle'])
        container_header = nodes.paragraph(classes=['header'])
        container_header += nodes.strong(text='Input arguments')
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, inputs, ''):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        # Make outputs section:
        container_node = nodes.container(classes=['toggle'])
        container_header = nodes.paragraph(classes=['header'])
        container_header += nodes.strong(text='Output results')
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, outputs, ''):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        return [
            section,
            addnodes.index(entries=[('single', name, 'process-' + slug, '',
                                     None)])
        ]
Exemple #2
0
    def make_process_node(self, process):
        """Fill the content of process definiton node.

        :param dict process: process data as given from yaml.load function
        :return: process node

        """
        name = process["name"]
        slug = process["slug"]
        typ = process["type"]
        version = process["version"]
        description = process.get("description", "")
        source_uri = process["source_uri"]
        inputs = process.get("input", [])
        outputs = process.get("output", [])

        # Make process name a section title:
        section = nodes.section(ids=["process-" + slug])
        section += nodes.title(name, name)

        # Make process header:
        section += self.make_process_header(slug, typ, version, source_uri,
                                            description, inputs)

        # Make inputs section:
        container_node = nodes.container(classes=["toggle"])
        container_header = nodes.paragraph(classes=["header"])
        container_header += nodes.strong(text="Input arguments")
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, inputs, ""):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        # Make outputs section:
        container_node = nodes.container(classes=["toggle"])
        container_header = nodes.paragraph(classes=["header"])
        container_header += nodes.strong(text="Output results")
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, outputs, ""):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        return [
            section,
            addnodes.index(entries=[("single", name, "process-" + slug, "",
                                     None)]),
        ]
Exemple #3
0
    def make_process_node(self, process):
        """Fill the content of process definiton node.

        :param dict process: process data as given from yaml.load function
        :return: process node

        """
        name = process['name']
        slug = process['slug']
        typ = process['type']
        version = process['version']
        description = process.get('description', '')
        source_uri = process['source_uri']
        inputs = process.get('input', [])
        outputs = process.get('output', [])

        # Make process name a section title:
        section = nodes.section(ids=['process-' + slug])
        section += nodes.title(name, name)

        # Make process header:
        section += self.make_process_header(slug, typ, version, source_uri, description, inputs)

        # Make inputs section:
        container_node = nodes.container(classes=['toggle'])
        container_header = nodes.paragraph(classes=['header'])
        container_header += nodes.strong(text='Input arguments')
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, inputs, ''):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        # Make outputs section:
        container_node = nodes.container(classes=['toggle'])
        container_header = nodes.paragraph(classes=['header'])
        container_header += nodes.strong(text='Output results')
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, outputs, ''):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        return [section, addnodes.index(entries=[('single', name, 'process-' + slug, '', None)])]
Exemple #4
0
    def valid(self, instance, schema):
        """Validate schema."""
        try:
            jsonschema.validate(instance, schema)
        except jsonschema.exceptions.ValidationError as ex:
            self.stderr.write("    VALIDATION ERROR: {}".format(
                instance["name"] if "name" in instance else ""))
            self.stderr.write("        path:       {}".format(ex.path))
            self.stderr.write("        message:    {}".format(ex.message))
            self.stderr.write("        validator:  {}".format(ex.validator))
            self.stderr.write("        val. value: {}".format(
                ex.validator_value))
            return False

        try:
            # Check that default values fit field schema.
            for field in ["input", "output", "schema"]:
                for schema, _, path in iterate_schema({},
                                                      instance.get(field, {})):
                    if "default" in schema:
                        validate_schema({schema["name"]: schema["default"]},
                                        [schema])
        except ValidationError:
            self.stderr.write("    VALIDATION ERROR: {}".format(
                instance["name"]))
            self.stderr.write(
                "        Default value of field '{}' is not valid.".format(
                    path))
            return False

        return True
Exemple #5
0
def render_descriptor(data):
    """Render data descriptor.

    The rendering is based on descriptor schema and input context.

    :param data: data instance
    :type data: :class:`resolwe.flow.models.Data` or :class:`dict`

    """
    if not data.descriptor_schema:
        return

    inputs = copy.deepcopy(data.input)
    if data.process.input_schema:
        hydrate_input_references(inputs,
                                 data.process.input_schema,
                                 hydrate_values=False)
    template_context = inputs

    # Set default values
    for field_schema, field, path in iterate_schema(
            data.descriptor, data.descriptor_schema.schema, 'descriptor'):
        if 'default' in field_schema and field_schema['name'] not in field:
            tmpl = field_schema['default']
            if field_schema['type'].startswith('list:'):
                tmpl = [
                    render_template(data.process, tmp, template_context)
                    if isinstance(tmp, six.string_types) else tmp
                    for tmp in tmpl
                ]
            elif isinstance(tmpl, six.string_types):
                tmpl = render_template(data.process, tmpl, template_context)

            dict_dot(data, path, tmpl)
Exemple #6
0
def fill_with_defaults(process_input, input_schema):
    """Fill empty optional fields in input with default values."""
    for field_schema, fields, path in iterate_schema(
        process_input, input_schema, include_groups=True
    ):
        if "group" in field_schema and field_schema["name"] not in fields:
            dict_dot(process_input, path, {})
        if "default" in field_schema and field_schema["name"] not in fields:
            dict_dot(process_input, path, field_schema["default"])
Exemple #7
0
 def storage_fields(self) -> Set[str]:
     """Get the names of storage fields in schema."""
     if self._storage_fields is None:
         self._storage_fields = {
             field_name
             for schema, _, field_name in iterate_schema(
                 self.data.output, self.data.process.output_schema)
             if schema["type"].startswith("basic:json:")
         }
     return self._storage_fields
Exemple #8
0
    def assertFields(self, obj, path, value):
        """Compare object's field to the given value.

        The file size is ignored. Use assertFile to validate
        file contents.

        :param obj: object with the field to compare
        :type obj: ~resolwe.flow.models.Data

        :param str path: path to
            :class:`~resolwe.flow.models.Data` object's field

        :param str value: desired value of
            :class:`~resolwe.flow.models.Data` object's field

        """
        field_schema, field = None, None
        for field_schema, field, field_path in iterate_schema(
            obj.output, obj.process.output_schema, ""
        ):
            if path == field_path:
                break
        else:
            self.fail("Field not found in path {}".format(path))

        field_name = field_schema["name"]
        field_value = field[field_name]

        def remove_file_size(field_value):
            """Remove size value from file field."""
            if "size" in field_value:
                del field_value["size"]

        # Ignore size in file and dir fields
        if field_schema["type"].startswith("basic:file:") or field_schema[
            "type"
        ].startswith("basic:dir:"):
            remove_file_size(field_value)
            remove_file_size(value)

        elif field_schema["type"].startswith("list:basic:file:") or field_schema[
            "type"
        ].startswith("list:basic:dir:"):
            for val in field_value:
                remove_file_size(val)
            for val in value:
                remove_file_size(val)

        self.assertEqual(
            field_value,
            value,
            msg="Field 'output.{}' mismatch: {} != {}".format(path, field_value, value)
            + self._debug_info(obj),
        )
Exemple #9
0
    def register_descriptors(self, descriptor_schemas, user, force=False, verbosity=1):
        """Read and register descriptors."""
        log_descriptors = []

        for descriptor_schema in descriptor_schemas:
            for field in ['var', 'schema']:
                for schema, _, _ in iterate_schema({}, descriptor_schema.get(field, {})):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in descriptor_schema:
                descriptor_schema['slug'] = slugify(descriptor_schema.pop('name').replace(':', '-'))
                descriptor_schema['name'] = descriptor_schema.pop('label')

            if 'schema' not in descriptor_schema:
                descriptor_schema['schema'] = []

            if 'static' in descriptor_schema:
                descriptor_schema['schema'].extend(descriptor_schema.pop('static'))
            if 'var' in descriptor_schema:
                descriptor_schema['schema'].extend(descriptor_schema.pop('var'))

            if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA):
                continue

            slug = descriptor_schema['slug']
            version = descriptor_schema.get('version', '0.0.0')
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug))
                continue

            descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version)
            if descriptor_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip descriptor schema {}: same version installed".format(slug))
                    continue

                descriptor_query.update(**descriptor_schema)
                log_descriptors.append("Updated {}".format(slug))
            else:
                DescriptorSchema.objects.create(contributor=user, **descriptor_schema)
                log_descriptors.append("Inserted {}".format(slug))

        if len(log_descriptors) > 0 and verbosity > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
Exemple #10
0
    def make_process_header(self, slug, typ, version, source_uri, description,
                            inputs):
        """Generate a process definition header.

        :param str slug: process' slug
        :param str typ: process' type
        :param str version:  process' version
        :param str source_uri: url to the process definition
        :param str description: process' description
        :param dict inputs: process' inputs

        """
        node = addnodes.desc()
        signode = addnodes.desc_signature(slug, "")
        node.append(signode)

        node["objtype"] = node["desctype"] = typ

        signode += addnodes.desc_annotation(typ, typ, classes=["process-type"])
        signode += addnodes.desc_addname("", "")
        signode += addnodes.desc_name(slug + " ", slug + " ")

        paramlist = addnodes.desc_parameterlist()

        for field_schema, _, _ in iterate_schema({}, inputs, ""):
            field_type = field_schema["type"]
            field_name = field_schema["name"]

            field_default = field_schema.get("default", None)
            field_default = "" if field_default is None else "={}".format(
                field_default)

            param = addnodes.desc_parameter("", "", noemph=True)
            param += nodes.emphasis(field_type,
                                    field_type,
                                    classes=["process-type"])
            # separate by non-breaking space in the output
            param += nodes.strong(text="\xa0\xa0" + field_name)

            paramlist += param

        signode += paramlist
        signode += nodes.reference(
            "",
            nodes.Text("[Source: v{}]".format(version)),
            refuri=source_uri,
            classes=["viewcode-link"],
        )

        desc = nodes.paragraph()
        desc += nodes.Text(description, description)

        return [node, desc]
Exemple #11
0
    def make_process_header(self, slug, typ, version, source_uri, description,
                            inputs):
        """Generate a process definition header.

        :param str slug: process' slug
        :param str typ: process' type
        :param str version:  process' version
        :param str source_uri: url to the process definition
        :param str description: process' description
        :param dict inputs: process' inputs

        """
        node = addnodes.desc()
        signode = addnodes.desc_signature(slug, '')
        node.append(signode)

        node['objtype'] = node['desctype'] = typ

        signode += addnodes.desc_annotation(typ, typ, classes=['process-type'])
        signode += addnodes.desc_addname('', '')
        signode += addnodes.desc_name(slug + ' ', slug + ' ')

        paramlist = addnodes.desc_parameterlist()

        for field_schema, _, _ in iterate_schema({}, inputs, ''):
            field_type = field_schema['type']
            field_name = field_schema['name']

            field_default = field_schema.get('default', None)
            field_default = '' if field_default is None else '={}'.format(
                field_default)

            param = addnodes.desc_parameter('', '', noemph=True)
            param += nodes.emphasis(field_type,
                                    field_type,
                                    classes=['process-type'])
            # separate by non-breaking space in the output
            param += nodes.strong(text='\xa0\xa0' + field_name)

            paramlist += param

        signode += paramlist
        signode += nodes.reference('',
                                   nodes.Text('[Source: v{}]'.format(version)),
                                   refuri=source_uri,
                                   classes=['viewcode-link'])

        desc = nodes.paragraph()
        desc += nodes.Text(description, description)

        return [node, desc]
Exemple #12
0
    def register_descriptors(self, descriptor_schemas, user, force=False, verbosity=1):
        """Read and register descriptors."""
        log_descriptors = []

        for descriptor_schema in descriptor_schemas:
            for schema, _, _ in iterate_schema({}, descriptor_schema.get('schema', {})):
                if not schema['type'][-1].endswith(':'):
                    schema['type'] += ':'

            if 'schema' not in descriptor_schema:
                descriptor_schema['schema'] = []

            if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA):
                continue

            slug = descriptor_schema['slug']
            version = descriptor_schema.get('version', '0.0.0')
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug))
                continue

            previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug)
            if previous_descriptor_qs.exists():
                previous_descriptor = previous_descriptor_qs.latest()
            else:
                previous_descriptor = None

            descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version)
            if descriptor_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip descriptor schema {}: same version installed".format(slug))
                    continue

                descriptor_query.update(**descriptor_schema)
                log_descriptors.append("Updated {}".format(slug))
            else:
                descriptor = DescriptorSchema.objects.create(contributor=user, **descriptor_schema)
                assign_contributor_permissions(descriptor)
                if previous_descriptor:
                    copy_permissions(previous_descriptor, descriptor)
                log_descriptors.append("Inserted {}".format(slug))

        if log_descriptors and verbosity > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
Exemple #13
0
    def assertFields(self, obj, path, value):  # pylint: disable=invalid-name
        """Compare object's field to the given value.

        The file size is ignored. Use assertFile to validate
        file contents.

        :param obj: object with the field to compare
        :type obj: ~resolwe.flow.models.Data

        :param str path: path to
            :class:`~resolwe.flow.models.Data` object's field

        :param str value: desired value of
            :class:`~resolwe.flow.models.Data` object's field

        """
        field_schema, field = None, None
        for field_schema, field, field_path in iterate_schema(obj.output, obj.process.output_schema, ''):
            if path == field_path:
                break
        else:
            self.fail("Field not found in path {}".format(path))

        field_name = field_schema['name']
        field_value = field[field_name]

        def remove_file_size(field_value):
            """Remove size value from file field."""
            if 'size' in field_value:
                del field_value['size']

        # Ignore size in file and dir fields
        if (field_schema['type'].startswith('basic:file:')
                or field_schema['type'].startswith('basic:dir:')):
            remove_file_size(field_value)
            remove_file_size(value)

        elif (field_schema['type'].startswith('list:basic:file:')
              or field_schema['type'].startswith('list:basic:dir:')):
            for val in field_value:
                remove_file_size(val)
            for val in value:
                remove_file_size(val)

        self.assertEqual(
            field_value, value,
            msg="Field 'output.{}' mismatch: {} != {}".format(path, field_value, value) + self._debug_info(obj)
        )
Exemple #14
0
def render_descriptor(data):
    """Render data descriptor.

    The rendering is based on descriptor schema and input context.

    :param data: data instance
    :type data: :class:`resolwe.flow.models.Data` or :class:`dict`

    """
    if not data.descriptor_schema:
        return

    # Set default values
    for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'):
        if 'default' in field_schema and field_schema['name'] not in field:
            dict_dot(data, path, field_schema['default'])
Exemple #15
0
def render_descriptor(data):
    """Render data descriptor.

    The rendering is based on descriptor schema and input context.

    :param data: data instance
    :type data: :class:`resolwe.flow.models.Data` or :class:`dict`

    """
    if not data.descriptor_schema:
        return

    # Set default values
    for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'):
        if 'default' in field_schema and field_schema['name'] not in field:
            dict_dot(data, path, field_schema['default'])
Exemple #16
0
    def make_process_header(self, slug, typ, version, source_uri, description, inputs):
        """Generate a process definition header.

        :param str slug: process' slug
        :param str typ: process' type
        :param str version:  process' version
        :param str source_uri: url to the process definition
        :param str description: process' description
        :param dict inputs: process' inputs

        """
        node = addnodes.desc()
        signode = addnodes.desc_signature(slug, '')
        node.append(signode)

        node['objtype'] = node['desctype'] = typ

        signode += addnodes.desc_annotation(typ, typ, classes=['process-type'])
        signode += addnodes.desc_addname('', '')
        signode += addnodes.desc_name(slug + ' ', slug + ' ')

        paramlist = addnodes.desc_parameterlist()

        for field_schema, _, _ in iterate_schema({}, inputs, ''):
            field_type = field_schema['type']
            field_name = field_schema['name']

            field_default = field_schema.get('default', None)
            field_default = '' if field_default is None else '={}'.format(field_default)

            param = addnodes.desc_parameter('', '', noemph=True)
            param += nodes.emphasis(field_type, field_type, classes=['process-type'])
            # separate by non-breaking space in the output
            param += nodes.strong(text='\xa0\xa0' + field_name)

            paramlist += param

        signode += paramlist
        signode += nodes.reference('', nodes.Text('[Source: v{}]'.format(version)),
                                   refuri=source_uri, classes=['viewcode-link'])

        desc = nodes.paragraph()
        desc += nodes.Text(description, description)

        return [node, desc]
Exemple #17
0
    def test_processor_types(self):
        procs = list(Process.objects.all())
        types = {}
        errors_equals = set()
        errors_subtype = set()

        for p in procs:
            fields = sorted(
                "{} {}".format(pth, schema["type"])
                for schema, _, pth in iterate_schema({}, p.output_schema, "output")
            )
            if p.type not in types:
                types[p.type] = {"fields": fields, "name": [p.name]}
            else:
                types[p.type]["name"].append(p.name)

                if types[p.type]["fields"] != fields:
                    errors_equals.add(p.type)

        if errors_equals:
            self.fail(
                "Processes of the same type should have the same output fields:\n\n    {}".format(
                    "\n    ".join(
                        ", ".join(types[typ]["name"]) for typ in errors_equals
                    )
                )
            )

        type_list = sorted(types)
        for i, typ in enumerate(type_list):
            for prev_typ in type_list[:i]:
                if typ.startswith(prev_typ):
                    prev_typ_fields = types[prev_typ]["fields"]
                    typ_fields = types[typ]["fields"]
                    if set(prev_typ_fields).difference(typ_fields):
                        errors_subtype.add("{} {}".format(prev_typ, typ))

        if errors_subtype:
            self.fail(
                "Processors should include all output fields of the parent type:\n\n    {}".format(
                    "\n    ".join(errors_subtype)
                )
            )
Exemple #18
0
    def test_processor_types(self):
        procs = list(Process.objects.all())
        types = {}
        errors_equals = set()
        errors_subtype = set()

        for p in procs:
            fields = sorted('{} {}'.format(pth, schema['type'])
                            for schema, _, pth in iterate_schema(
                                {}, p.output_schema, 'output'))
            if p.type not in types:
                types[p.type] = {'fields': fields, 'name': [p.name]}
            else:
                types[p.type]['name'].append(p.name)

                if types[p.type]['fields'] != fields:
                    errors_equals.add(p.type)

        if errors_equals:
            self.fail(
                'Processes of the same type should have the same output fields:\n\n    {}'
                .format('\n    '.join(', '.join(types[typ]['name'])
                                      for typ in errors_equals)))

        type_list = sorted(types)
        for i, typ in enumerate(type_list):
            for prev_typ in type_list[:i]:
                if typ.startswith(prev_typ):
                    prev_typ_fields = types[prev_typ]['fields']
                    typ_fields = types[typ]['fields']
                    if set(prev_typ_fields).difference(typ_fields):
                        errors_subtype.add('{} {}'.format(prev_typ, typ))

        if errors_subtype:
            self.fail(
                'Processors should include all output fields of the parent type:\n\n    {}'
                .format('\n    '.join(errors_subtype)))
Exemple #19
0
    def valid(self, instance, schema):
        """Validate schema."""
        try:
            jsonschema.validate(instance, schema)
        except jsonschema.exceptions.ValidationError as ex:
            self.stderr.write("    VALIDATION ERROR: {}".format(instance['name'] if 'name' in instance else ''))
            self.stderr.write("        path:       {}".format(ex.path))
            self.stderr.write("        message:    {}".format(ex.message))
            self.stderr.write("        validator:  {}".format(ex.validator))
            self.stderr.write("        val. value: {}".format(ex.validator_value))
            return False

        try:
            # Check that default values fit field schema.
            for field in ['input', 'output', 'schema']:
                for schema, _, path in iterate_schema({}, instance.get(field, {})):
                    if 'default' in schema:
                        validate_schema({schema['name']: schema['default']}, [schema])
        except ValidationError:
            self.stderr.write("    VALIDATION ERROR: {}".format(instance['name']))
            self.stderr.write("        Default value of field '{}' is not valid.". format(path))
            return False

        return True
    def test_processor_types(self):
        procs = list(Process.objects.all())
        types = {}
        errors_equals = set()
        errors_subtype = set()

        for p in procs:
            fields = sorted('{} {}'.format(pth, schema['type']) for schema, _, pth in
                            iterate_schema({}, p.output_schema, 'output'))
            if p.type not in types:
                types[p.type] = {
                    'fields': fields,
                    'name': [p.name]
                }
            else:
                types[p.type]['name'].append(p.name)

                if types[p.type]['fields'] != fields:
                    errors_equals.add(p.type)

        if errors_equals:
            self.fail('Processes of the same type should have the same output fields:\n\n    {}'.format(
                '\n    '.join(', '.join(types[typ]['name']) for typ in errors_equals)))

        type_list = sorted(types)
        for i, typ in enumerate(type_list):
            for prev_typ in type_list[:i]:
                if typ.startswith(prev_typ):
                    prev_typ_fields = types[prev_typ]['fields']
                    typ_fields = types[typ]['fields']
                    if set(prev_typ_fields).difference(typ_fields):
                        errors_subtype.add('{} {}'.format(prev_typ, typ))

        if errors_subtype:
            self.fail('Processors should include all output fields of the parent type:\n\n    {}'.format(
                '\n    '.join(errors_subtype)))
Exemple #21
0
def validate_schema(instance, schema, test_required=True, path_prefix=None):
    """Check if DictField values are consistent with our data types.

    Perform basic JSON schema validation and our custom validations:

      * check that required fields are given (if `test_required` is set
        to ``True``)
      * check if ``basic:file:`` and ``list:basic:file`` fields match
        regex given in schema (only if ``validate_regex`` is defined in
        schema for coresponding fields) and exists (only if
        ``path_prefix`` is given)
      * check if directories referenced in ``basic:dir:`` and
        ``list:basic:dir``fields exist (only if ``path_prefix`` is
        given)
      * check that referenced ``Data`` objects (in ``data:<data_type>``
        and  ``list:data:<data_type>`` fields) exists and are of type
        ``<data_type>``
      * check that referenced ``Storage`` objects (in ``basic:json``
        fields) exists

    :param list instance: Instance to be validated
    :param list schema: Schema for validation
    :param bool test_required: Flag for testing if all required fields
        are present. It is usefule if validation is run before ``Data``
        object is finished and there are some field stil missing
        (default: ``False``)
    :param str path_prefix: path prefix used for checking if files and
        directories exist (default: ``None``)
    :rtype: None
    :raises ValidationError: if ``instance`` doesn't match schema
        defined in ``schema``

    """
    def validate_refs(field):
        """Validate reference paths."""
        if 'refs' in field:
            for refs_filename in field['refs']:
                refs_path = os.path.join(path_prefix, refs_filename)
                if not (os.path.isfile(refs_path) or os.path.isdir(refs_path)):
                    raise ValidationError(
                        "File referenced in `refs` ({}) does not exist".format(
                            refs_path))

    def validate_file(field, regex):
        """Validate file name (and check that it exists)."""
        filename = field['file']

        if regex and not re.search(regex, filename):
            raise ValidationError(
                "File name {} does not match regex {}".format(filename, regex))

        if path_prefix:
            path = os.path.join(path_prefix, filename)
            if not os.path.isfile(path):
                raise ValidationError(
                    "Referenced file ({}) does not exist".format(path))

            validate_refs(field)

    def validate_dir(field):
        """Check that dirs and referenced files exists."""
        dirname = field['dir']

        if path_prefix:
            path = os.path.join(path_prefix, dirname)
            if not os.path.isdir(path):
                raise ValidationError(
                    "Referenced dir ({}) does not exist".format(path))

            validate_refs(field)

    def validate_data(data_pk, type_):
        """Check that `Data` objects exist and is of right type."""
        from .data import Data  # prevent circular import

        data_qs = Data.objects.filter(pk=data_pk).values('process__type')
        if not data_qs.exists():
            raise ValidationError(
                "Referenced `Data` object does not exist (id:{})".format(
                    data_pk))
        data = data_qs.first()
        if not data['process__type'].startswith(type_):
            raise ValidationError(
                "Data object of type `{}` is required, but type `{}` is given. "
                "(id:{})".format(type_, data['process__type'], data_pk))

    is_dirty = False
    dirty_fields = []
    for _schema, _fields, _ in iterate_schema(instance, schema):
        name = _schema['name']
        is_required = _schema.get('required', True)

        if test_required and is_required and name not in _fields:
            is_dirty = True
            dirty_fields.append(name)

        if name in _fields:
            field = _fields[name]
            type_ = _schema.get('type', "")

            # Treat None as if the field is missing.
            if not is_required and field is None:
                continue

            try:
                jsonschema.validate([{
                    "type": type_,
                    "value": field
                }], TYPE_SCHEMA)
            except jsonschema.exceptions.ValidationError as ex:
                raise ValidationError(ex.message)

            choices = [
                choice['value'] for choice in _schema.get('choices', [])
            ]
            allow_custom_choice = _schema.get('allow_custom_choice', False)
            if choices and not allow_custom_choice and field not in choices:
                raise ValidationError(
                    "Value of field '{}' must match one of predefined choices. "
                    "Current value: {}".format(name, field))

            if type_ == 'basic:file:':
                validate_file(field, _schema.get('validate_regex'))

            elif type_ == 'list:basic:file:':
                for obj in field:
                    validate_file(obj, _schema.get('validate_regex'))

            elif type_ == 'basic:dir:':
                validate_dir(field)

            elif type_ == 'list:basic:dir:':
                for obj in field:
                    validate_dir(obj)

            elif type_ == 'basic:json:' and not Storage.objects.filter(
                    pk=field).exists():
                raise ValidationError(
                    "Referenced `Storage` object does not exist (id:{})".
                    format(field))

            elif type_.startswith('data:'):
                validate_data(field, type_)

            elif type_.startswith('list:data:'):
                for data_id in field:
                    validate_data(data_id,
                                  type_[5:])  # remove `list:` from type

    try:
        # Check that schema definitions exist for all fields
        for _, _ in iterate_fields(instance, schema):
            pass
    except KeyError as ex:
        raise ValidationError(str(ex))

    if is_dirty:
        dirty_fields = ['"{}"'.format(field) for field in dirty_fields]
        raise DirtyError("Required fields {} not given.".format(
            ', '.join(dirty_fields)))
Exemple #22
0
    def create(self, request, *args, **kwargs):
        """Create a resource."""
        collections = request.data.get('collections', [])

        # check that user has permissions on all collections that Data
        # object will be added to
        for collection_id in collections:
            try:
                collection = Collection.objects.get(pk=collection_id)
            except Collection.DoesNotExist:
                return Response(
                    {
                        'collections': [
                            'Invalid pk "{}" - object does not exist.'.format(
                                collection_id)
                        ]
                    },
                    status=status.HTTP_400_BAD_REQUEST)

            if not request.user.has_perm('add_collection', obj=collection):
                if request.user.has_perm('view_collection', obj=collection):
                    raise exceptions.PermissionDenied(
                        "You don't have `ADD` permission on collection (id: {})."
                        .format(collection_id))
                else:
                    raise exceptions.NotFound(
                        "Collection not found (id: {}).".format(collection_id))

        # translate processe's slug to id
        process_slug = request.data.get('process', None)
        process_query = Process.objects.filter(slug=process_slug)
        process_query = get_objects_for_user(request.user, 'view_process',
                                             process_query)
        try:
            process = process_query.latest()
        except Process.DoesNotExist:
            return Response(
                {
                    'process': [
                        'Invalid process slug "{}" - object does not exist.'.
                        format(process_slug)
                    ]
                },
                status=status.HTTP_400_BAD_REQUEST)
        request.data['process'] = process.pk

        # perform "get_or_create" if requested - return existing object
        # if found
        if kwargs.pop('get_or_create', False):
            process_input = request.data.get('input', {})

            # use default values if they are not given
            for field_schema, fields, path in iterate_schema(
                    process_input, process.input_schema):
                if 'default' in field_schema and field_schema[
                        'name'] not in fields:
                    dict_dot(process_input, path, field_schema['default'])

            checksum = get_data_checksum(process_input, process.slug,
                                         process.version)
            data_qs = Data.objects.filter(
                checksum=checksum,
                process__persistence__in=[
                    Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP
                ],
            )
            data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
            if data_qs.exists():
                data = data_qs.order_by('created').last()
                serializer = self.get_serializer(data)
                return Response(serializer.data)

        # create the objects
        resp = super(DataViewSet, self).create(request, *args, **kwargs)

        # run manager
        manager.communicate()

        return resp
Exemple #23
0
def validate_schema(instance, schema, test_required=True, data_location=None,
                    skip_missing_data=False):
    """Check if DictField values are consistent with our data types.

    Perform basic JSON schema validation and our custom validations:

      * check that required fields are given (if `test_required` is set
        to ``True``)
      * check if ``basic:file:`` and ``list:basic:file`` fields match
        regex given in schema (only if ``validate_regex`` is defined in
        schema for coresponding fields) and exists (only if
        ``data_location`` is given)
      * check if directories referenced in ``basic:dir:`` and
        ``list:basic:dir``fields exist (only if ``data_location`` is
        given)
      * check that referenced ``Data`` objects (in ``data:<data_type>``
        and  ``list:data:<data_type>`` fields) exists and are of type
        ``<data_type>``
      * check that referenced ``Storage`` objects (in ``basic:json``
        fields) exists

    :param list instance: Instance to be validated
    :param list schema: Schema for validation
    :param bool test_required: Flag for testing if all required fields
        are present. It is usefule if validation is run before ``Data``
        object is finished and there are some field stil missing
        (default: ``False``)
    :param :class:`~resolwe.flow.models.data.DataLocation` data_location:
        data location used for checking if files and directories exist
        (default: ``None``)
    :param bool skip_missing_data: Don't raise an error if referenced
        ``Data`` object does not exist
    :rtype: None
    :raises ValidationError: if ``instance`` doesn't match schema
        defined in ``schema``

    """
    from .storage import Storage  # Prevent circular import.

    path_prefix = None
    if data_location:
        path_prefix = data_location.get_path()

    def validate_refs(field):
        """Validate reference paths."""
        for ref_filename in field.get('refs', []):
            ref_path = os.path.join(path_prefix, ref_filename)
            if not os.path.exists(ref_path):
                raise ValidationError("Path referenced in `refs` ({}) does not exist.".format(ref_path))
            if not (os.path.isfile(ref_path) or os.path.isdir(ref_path)):
                raise ValidationError(
                    "Path referenced in `refs` ({}) is neither a file or directory.".format(ref_path))

    def validate_file(field, regex):
        """Validate file name (and check that it exists)."""
        filename = field['file']

        if regex and not re.search(regex, filename):
            raise ValidationError(
                "File name {} does not match regex {}".format(filename, regex))

        if path_prefix:
            path = os.path.join(path_prefix, filename)
            if not os.path.exists(path):
                raise ValidationError("Referenced path ({}) does not exist.".format(path))
            if not os.path.isfile(path):
                raise ValidationError("Referenced path ({}) is not a file.".format(path))

            validate_refs(field)

    def validate_dir(field):
        """Check that dirs and referenced files exists."""
        dirname = field['dir']

        if path_prefix:
            path = os.path.join(path_prefix, dirname)
            if not os.path.exists(path):
                raise ValidationError("Referenced path ({}) does not exist.".format(path))
            if not os.path.isdir(path):
                raise ValidationError("Referenced path ({}) is not a directory.".format(path))

            validate_refs(field)

    def validate_data(data_pk, type_):
        """Check that `Data` objects exist and is of right type."""
        from .data import Data  # prevent circular import

        data_qs = Data.objects.filter(pk=data_pk).values('process__type')
        if not data_qs.exists():
            if skip_missing_data:
                return

            raise ValidationError(
                "Referenced `Data` object does not exist (id:{})".format(data_pk))
        data = data_qs.first()
        if not data['process__type'].startswith(type_):
            raise ValidationError(
                "Data object of type `{}` is required, but type `{}` is given. "
                "(id:{})".format(type_, data['process__type'], data_pk))

    def validate_range(value, interval, name):
        """Check that given value is inside the specified range."""
        if not interval:
            return

        if value < interval[0] or value > interval[1]:
            raise ValidationError(
                "Value of field '{}' is out of range. It should be between {} and {}.".format(
                    name, interval[0], interval[1]
                )
            )

    is_dirty = False
    dirty_fields = []
    for _schema, _fields, _ in iterate_schema(instance, schema):
        name = _schema['name']
        is_required = _schema.get('required', True)

        if test_required and is_required and name not in _fields:
            is_dirty = True
            dirty_fields.append(name)

        if name in _fields:
            field = _fields[name]
            type_ = _schema.get('type', "")

            # Treat None as if the field is missing.
            if not is_required and field is None:
                continue

            try:
                jsonschema.validate([{"type": type_, "value": field}], TYPE_SCHEMA)
            except jsonschema.exceptions.ValidationError as ex:
                raise ValidationError(ex.message)

            choices = [choice['value'] for choice in _schema.get('choices', [])]
            allow_custom_choice = _schema.get('allow_custom_choice', False)
            if choices and not allow_custom_choice and field not in choices:
                raise ValidationError(
                    "Value of field '{}' must match one of predefined choices. "
                    "Current value: {}".format(name, field)
                )

            if type_ == 'basic:file:':
                validate_file(field, _schema.get('validate_regex'))

            elif type_ == 'list:basic:file:':
                for obj in field:
                    validate_file(obj, _schema.get('validate_regex'))

            elif type_ == 'basic:dir:':
                validate_dir(field)

            elif type_ == 'list:basic:dir:':
                for obj in field:
                    validate_dir(obj)

            elif type_ == 'basic:json:' and not Storage.objects.filter(pk=field).exists():
                raise ValidationError(
                    "Referenced `Storage` object does not exist (id:{})".format(field))

            elif type_.startswith('data:'):
                validate_data(field, type_)

            elif type_.startswith('list:data:'):
                for data_id in field:
                    validate_data(data_id, type_[5:])  # remove `list:` from type

            elif type_ == 'basic:integer:' or type_ == 'basic:decimal:':
                validate_range(field, _schema.get('range'), name)

            elif type_ == 'list:basic:integer:' or type_ == 'list:basic:decimal:':
                for obj in field:
                    validate_range(obj, _schema.get('range'), name)

    try:
        # Check that schema definitions exist for all fields
        for _, _ in iterate_fields(instance, schema):
            pass
    except KeyError as ex:
        raise ValidationError(str(ex))

    if is_dirty:
        dirty_fields = ['"{}"'.format(field) for field in dirty_fields]
        raise DirtyError("Required fields {} not given.".format(', '.join(dirty_fields)))
Exemple #24
0
    def register_descriptors(self,
                             descriptor_schemas,
                             user,
                             force=False,
                             verbosity=1):
        """Read and register descriptors."""
        log_descriptors = []

        for descriptor_schema in descriptor_schemas:
            for schema, _, _ in iterate_schema({},
                                               descriptor_schema.get(
                                                   "schema", {})):
                if not schema["type"][-1].endswith(":"):
                    schema["type"] += ":"

            if "schema" not in descriptor_schema:
                descriptor_schema["schema"] = []

            if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA):
                continue

            slug = descriptor_schema["slug"]
            version = descriptor_schema.get("version", "0.0.0")
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(
                slug=slug).aggregate(Max("version"))["version__max"]
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip descriptor schema {}: newer version installed".
                    format(slug))
                continue

            previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug)
            if previous_descriptor_qs.exists():
                previous_descriptor = previous_descriptor_qs.latest()
            else:
                previous_descriptor = None

            descriptor_query = DescriptorSchema.objects.filter(slug=slug,
                                                               version=version)
            if descriptor_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip descriptor schema {}: same version installed"
                            .format(slug))
                    continue

                descriptor_query.update(**descriptor_schema)
                log_descriptors.append("Updated {}".format(slug))
            else:
                descriptor = DescriptorSchema.objects.create(
                    contributor=user, **descriptor_schema)
                assign_contributor_permissions(descriptor)
                if previous_descriptor:
                    copy_permissions(previous_descriptor, descriptor)
                log_descriptors.append("Inserted {}".format(slug))

        if log_descriptors and verbosity > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
Exemple #25
0
def fill_with_defaults(process_input, input_schema):
    """Fill empty optional fields in input with default values."""
    for field_schema, fields, path in iterate_schema(process_input, input_schema):
        if 'default' in field_schema and field_schema['name'] not in fields:
            dict_dot(process_input, path, field_schema['default'])
Exemple #26
0
    def register_processes(self, process_schemas, user, force=False, verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if 'flow_collection' in p:
                if 'entity' in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p['slug'])
                    )
                    continue

                p['entity'] = {'type': p.pop('flow_collection')}

            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema({}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'entity' in p:
                if 'type' not in p['entity']:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined".format(p['slug'])
                    )
                    continue

                p['entity_type'] = p['entity']['type']
                p['entity_descriptor_schema'] = p['entity'].get('descriptor_schema', p['entity_type'])
                p['entity_input'] = p['entity'].get('input', None)
                p.pop('entity')

                if not DescriptorSchema.objects.filter(slug=p['entity_descriptor_schema']).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p['slug'], p['entity_descriptor_schema'])
                    )
                    continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema', []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write("Skip processor {}: execution engine '{}' not supported".format(
                        slug, p['run']['language']
                    ))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image):
                        self.stderr.write("Skip processor {}: container image does not match '{}'".format(
                            slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                        ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write("Skip processor {}: same version installed".format(slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #27
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            # TODO: Remove this when all processes are migrated to the
            #       new syntax.
            if "flow_collection" in p:
                if "entity" in p:
                    self.stderr.write(
                        "Skip processor {}: only one of 'flow_collection' and 'entity' fields "
                        "allowed".format(p["slug"]))
                    continue

                p["entity"] = {"type": p.pop("flow_collection")}

            if p["type"][-1] != ":":
                p["type"] += ":"

            if "category" in p and not p["category"].endswith(":"):
                p["category"] += ":"

            for field in ["input", "output"]:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema["type"][-1].endswith(":"):
                        schema["type"] += ":"
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if "entity" in p:
                if "type" not in p["entity"]:
                    self.stderr.write(
                        "Skip process {}: 'entity.type' required if 'entity' defined"
                        .format(p["slug"]))
                    continue
                if "input" in p["entity"] and p["entity"].get(
                        "always_create", False):
                    self.stderr.write(
                        "Skip process {}: 'entity.input' will not be considered if 'entity.always_create' "
                        "is set to true.".format(p["slug"]))
                    continue

                p["entity_type"] = p["entity"]["type"]
                p["entity_descriptor_schema"] = p["entity"].get(
                    "descriptor_schema", p["entity_type"])
                p["entity_input"] = p["entity"].get("input", None)
                p["entity_always_create"] = p["entity"].get(
                    "always_create", False)
                p.pop("entity")

                if not DescriptorSchema.objects.filter(
                        slug=p["entity_descriptor_schema"]).exists():
                    self.stderr.write(
                        "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' "
                        "field.".format(p["slug"],
                                        p["entity_descriptor_schema"]))
                    continue

            if "persistence" in p:
                persistence_mapping = {
                    "RAW": Process.PERSISTENCE_RAW,
                    "CACHED": Process.PERSISTENCE_CACHED,
                    "TEMP": Process.PERSISTENCE_TEMP,
                }

                p["persistence"] = persistence_mapping[p["persistence"]]

            if "scheduling_class" in p:
                scheduling_class_mapping = {
                    "interactive": Process.SCHEDULING_CLASS_INTERACTIVE,
                    "batch": Process.SCHEDULING_CLASS_BATCH,
                }

                p["scheduling_class"] = scheduling_class_mapping[
                    p["scheduling_class"]]

            if "input" in p:
                p["input_schema"] = p.pop("input")

            if "output" in p:
                p["output_schema"] = p.pop("output")

            slug = p["slug"]

            if "run" in p:
                # Set default language to 'bash' if not set.
                p["run"].setdefault("language", "bash")

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p["run"]["language"])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault("output_schema",
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p["run"]["language"]))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, "FLOW_CONTAINER_VALIDATE_IMAGE"):
                try:
                    container_image = dict_dot(
                        p, "requirements.executor.docker.image")
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p["version"]
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max("version"))["version__max"]
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #28
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            for field in ['input', 'output']:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'scheduling_class' in p:
                scheduling_class_mapping = {
                    'interactive': Process.SCHEDULING_CLASS_INTERACTIVE,
                    'batch': Process.SCHEDULING_CLASS_BATCH
                }

                p['scheduling_class'] = scheduling_class_mapping[
                    p['scheduling_class']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema',
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p['run']['language']))
                    continue

            # Validate if container image is allowed based on the configured pattern.
            # NOTE: This validation happens here and is not deferred to executors because the idea
            #       is that this will be moved to a "container" requirement independent of the
            #       executor.
            if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'):
                try:
                    container_image = dict_dot(
                        p, 'requirements.executor.docker.image')
                    if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                                    container_image):
                        self.stderr.write(
                            "Skip processor {}: container image does not match '{}'"
                            .format(
                                slug,
                                settings.FLOW_CONTAINER_VALIDATE_IMAGE,
                            ))
                        continue
                except KeyError:
                    pass

            version = p['version']
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                assign_contributor_permissions(process)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if log_processors:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if log_templates:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #29
0
def validate_schema(
    instance, schema, test_required=True, data_location=None, skip_missing_data=False
):
    """Check if DictField values are consistent with our data types.

    Perform basic JSON schema validation and our custom validations:

      * check that required fields are given (if `test_required` is set
        to ``True``)
      * check if ``basic:file:`` and ``list:basic:file`` fields match
        regex given in schema (only if ``validate_regex`` is defined in
        schema for coresponding fields) and exists (only if
        ``data_location`` is given)
      * check if directories referenced in ``basic:dir:`` and
        ``list:basic:dir``fields exist (only if ``data_location`` is
        given)
      * check that referenced ``Data`` objects (in ``data:<data_type>``
        and  ``list:data:<data_type>`` fields) exists and are of type
        ``<data_type>``
      * check that referenced ``Storage`` objects (in ``basic:json``
        fields) exists

    :param list instance: Instance to be validated
    :param list schema: Schema for validation
    :param bool test_required: Flag for testing if all required fields
        are present. It is usefule if validation is run before ``Data``
        object is finished and there are some field stil missing
        (default: ``False``)
    :param :class:`~resolwe.flow.models.data.DataLocation` data_location:
        data location used for checking if files and directories exist
        (default: ``None``)
    :param bool skip_missing_data: Don't raise an error if referenced
        ``Data`` object does not exist
    :rtype: None
    :raises ValidationError: if ``instance`` doesn't match schema
        defined in ``schema``

    """
    from .storage import Storage  # Prevent circular import.

    path_prefix = None
    if data_location:
        path_prefix = data_location.get_path()

    def validate_refs(field):
        """Validate reference paths."""
        for ref_filename in field.get("refs", []):
            ref_path = os.path.join(path_prefix, ref_filename)
            if not os.path.exists(ref_path):
                raise ValidationError(
                    "Path referenced in `refs` ({}) does not exist.".format(ref_path)
                )
            if not (os.path.isfile(ref_path) or os.path.isdir(ref_path)):
                raise ValidationError(
                    "Path referenced in `refs` ({}) is neither a file or directory.".format(
                        ref_path
                    )
                )

    def validate_file(field, regex):
        """Validate file name (and check that it exists)."""
        filename = field["file"]

        if regex and not re.search(regex, filename):
            raise ValidationError(
                "File name {} does not match regex {}".format(filename, regex)
            )

        if path_prefix:
            path = os.path.join(path_prefix, filename)
            if not os.path.exists(path):
                raise ValidationError(
                    "Referenced path ({}) does not exist.".format(path)
                )
            if not os.path.isfile(path):
                raise ValidationError(
                    "Referenced path ({}) is not a file.".format(path)
                )

            validate_refs(field)

    def validate_dir(field):
        """Check that dirs and referenced files exists."""
        dirname = field["dir"]

        if path_prefix:
            path = os.path.join(path_prefix, dirname)
            if not os.path.exists(path):
                raise ValidationError(
                    "Referenced path ({}) does not exist.".format(path)
                )
            if not os.path.isdir(path):
                raise ValidationError(
                    "Referenced path ({}) is not a directory.".format(path)
                )

            validate_refs(field)

    def validate_data(data_pk, type_):
        """Check that `Data` objects exist and is of right type."""
        from .data import Data  # prevent circular import

        data_qs = Data.objects.filter(pk=data_pk).values("process__type")
        if not data_qs.exists():
            if skip_missing_data:
                return

            raise ValidationError(
                "Referenced `Data` object does not exist (id:{})".format(data_pk)
            )
        data = data_qs.first()
        if not data["process__type"].startswith(type_):
            raise ValidationError(
                "Data object of type `{}` is required, but type `{}` is given. "
                "(id:{})".format(type_, data["process__type"], data_pk)
            )

    def validate_range(value, interval, name):
        """Check that given value is inside the specified range."""
        if not interval:
            return

        if value < interval[0] or value > interval[1]:
            raise ValidationError(
                "Value of field '{}' is out of range. It should be between {} and {}.".format(
                    name, interval[0], interval[1]
                )
            )

    is_dirty = False
    dirty_fields = []
    for _schema, _fields, _ in iterate_schema(instance, schema):
        name = _schema["name"]
        is_required = _schema.get("required", True)

        if test_required and is_required and name not in _fields:
            is_dirty = True
            dirty_fields.append(name)

        if name in _fields:
            field = _fields[name]
            type_ = _schema.get("type", "")

            # Treat None as if the field is missing.
            if not is_required and field is None:
                continue

            try:
                jsonschema.validate([{"type": type_, "value": field}], TYPE_SCHEMA)
            except jsonschema.exceptions.ValidationError as ex:
                raise ValidationError(ex.message)

            choices = [choice["value"] for choice in _schema.get("choices", [])]
            allow_custom_choice = _schema.get("allow_custom_choice", False)
            if choices and not allow_custom_choice and field not in choices:
                raise ValidationError(
                    "Value of field '{}' must match one of predefined choices. "
                    "Current value: {}".format(name, field)
                )

            if type_ == "basic:file:":
                validate_file(field, _schema.get("validate_regex"))

            elif type_ == "list:basic:file:":
                for obj in field:
                    validate_file(obj, _schema.get("validate_regex"))

            elif type_ == "basic:dir:":
                validate_dir(field)

            elif type_ == "list:basic:dir:":
                for obj in field:
                    validate_dir(obj)

            elif (
                type_ == "basic:json:" and not Storage.objects.filter(pk=field).exists()
            ):
                raise ValidationError(
                    "Referenced `Storage` object does not exist (id:{})".format(field)
                )

            elif type_.startswith("data:"):
                validate_data(field, type_)

            elif type_.startswith("list:data:"):
                for data_id in field:
                    validate_data(data_id, type_[5:])  # remove `list:` from type

            elif type_ == "basic:integer:" or type_ == "basic:decimal:":
                validate_range(field, _schema.get("range"), name)

            elif type_ == "list:basic:integer:" or type_ == "list:basic:decimal:":
                for obj in field:
                    validate_range(obj, _schema.get("range"), name)

    try:
        # Check that schema definitions exist for all fields
        for _, _ in iterate_fields(instance, schema):
            pass
    except KeyError as ex:
        raise ValidationError(str(ex))

    if is_dirty:
        dirty_fields = ['"{}"'.format(field) for field in dirty_fields]
        raise DirtyError(
            "Required fields {} not given.".format(", ".join(dirty_fields))
        )
Exemple #30
0
    def register_processes(self,
                           process_schemas,
                           user,
                           force=False,
                           verbosity=1):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            # get `data_name` from `static`
            if 'static' in p:
                for schema, _, _ in iterate_schema({}, p['static']):
                    if schema['name'] == 'name' and 'default' in schema:
                        p['data_name'] = schema['default']

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in p:
                p['slug'] = slugify(p.pop('name').replace(':', '-'))
                p['name'] = p.pop('label')

                p.pop('var', None)
                p.pop('static', None)

            for field in ['input', 'output', 'var', 'static']:
                for schema, _, _ in iterate_schema(
                    {}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']

            if 'run' in p:
                # Set default language to 'bash' if not set.
                p['run'].setdefault('language', 'bash')

                # Transform output schema using the execution engine.
                try:
                    execution_engine = manager.get_execution_engine(
                        p['run']['language'])
                    extra_output_schema = execution_engine.get_output_schema(p)
                    if extra_output_schema:
                        p.setdefault('output_schema',
                                     []).extend(extra_output_schema)
                except InvalidEngineError:
                    self.stderr.write(
                        "Skip processor {}: execution engine '{}' not supported"
                        .format(slug, p['run']['language']))
                    continue

            version = p['version']
            int_version = convert_version_string_to_int(
                version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(
                Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write(
                    "Skip processor {}: newer version installed".format(slug))
                continue

            previous_process_qs = Process.objects.filter(slug=slug)
            if previous_process_qs.exists():
                previous_process = previous_process_qs.latest()
            else:
                previous_process = None

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    if verbosity > 0:
                        self.stdout.write(
                            "Skip processor {}: same version installed".format(
                                slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                process = Process.objects.create(contributor=user, **p)
                if previous_process:
                    copy_permissions(previous_process, process)
                log_processors.append("Inserted {}".format(slug))

        if verbosity > 0:
            if len(log_processors) > 0:
                self.stdout.write("Processor Updates:")
                for log in log_processors:
                    self.stdout.write("  {}".format(log))

            if len(log_templates) > 0:
                self.stdout.write("Default Template Updates:")
                for log in log_templates:
                    self.stdout.write("  {}".format(log))
Exemple #31
0
    def save(self, render_name=False, *args, **kwargs):
        """Save the data model."""
        # Generate the descriptor if one is not already set.
        if self.name != self._original_name:
            self.named_by_user = True

        create = self.pk is None
        if create:
            # Default values for INPUT
            input_schema = self.process.input_schema  # pylint: disable=no-member
            for field_schema, fields, path in iterate_schema(
                    self.input, input_schema):
                if 'default' in field_schema and field_schema[
                        'name'] not in fields:
                    dict_dot(self.input, path, field_schema['default'])

            if not self.name:
                self._render_name()
            else:
                self.named_by_user = True

            self.checksum = get_data_checksum(self.input, self.process.slug,
                                              self.process.version)  # pylint: disable=no-member

        elif render_name:
            self._render_name()

        self.save_storage(self.output, self.process.output_schema)  # pylint: disable=no-member

        if self.status != Data.STATUS_ERROR:
            hydrate_size(self)

        if create:
            validate_schema(self.input, self.process.input_schema)  # pylint: disable=no-member

        render_descriptor(self)

        if self.descriptor_schema:
            try:
                validate_schema(self.descriptor, self.descriptor_schema.schema)  # pylint: disable=no-member
                self.descriptor_dirty = False
            except DirtyError:
                self.descriptor_dirty = True
        elif self.descriptor and self.descriptor != {}:
            raise ValueError(
                "`descriptor_schema` must be defined if `descriptor` is given")

        if self.status != Data.STATUS_ERROR:
            path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'],
                                       str(self.pk))
            output_schema = self.process.output_schema  # pylint: disable=no-member
            if self.status == Data.STATUS_DONE:
                validate_schema(self.output,
                                output_schema,
                                path_prefix=path_prefix)
            else:
                validate_schema(self.output,
                                output_schema,
                                path_prefix=path_prefix,
                                test_required=False)

        with transaction.atomic():
            super(Data, self).save(*args, **kwargs)

            # We can only save dependencies after the data object has been saved. This
            # is why a transaction block is needed and the save method must be called first.
            if create:
                self.save_dependencies(self.input, self.process.input_schema)  # pylint: disable=no-member

        if create:
            self.create_entity()
Exemple #32
0
def fill_with_defaults(process_input, input_schema):
    """Fill empty optional fields in input with default values."""
    for field_schema, fields, path in iterate_schema(process_input, input_schema):
        if 'default' in field_schema and field_schema['name'] not in fields:
            dict_dot(process_input, path, field_schema['default'])