Ejemplo n.º 1
0
    def make_process_node(self, process):
        """Fill the content of process definiton node.

        :param dict process: process data as given from yaml.load function
        :return: process node

        """
        name = process['name']
        slug = process['slug']
        typ = process['type']
        version = process['version']
        description = process.get('description', '')
        source_uri = process['source_uri']
        inputs = process.get('input', [])
        outputs = process.get('output', [])

        # Make process name a section title:
        section = nodes.section(ids=['process-' + slug])
        section += nodes.title(name, name)

        # Make process header:
        section += self.make_process_header(slug, typ, version, source_uri, description, inputs)

        # Make inputs section:
        container_node = nodes.container(classes=['toggle'])
        container_header = nodes.paragraph(classes=['header'])
        container_header += nodes.strong(text='Input arguments')
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, inputs, ''):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        # Make outputs section:
        container_node = nodes.container(classes=['toggle'])
        container_header = nodes.paragraph(classes=['header'])
        container_header += nodes.strong(text='Output results')
        container_node += container_header

        container_body = nodes.container()
        for field_schema, _, path in iterate_schema({}, outputs, ''):
            container_body += nodes.strong(text=path)
            container_body += self.make_properties_list(field_schema)

        container_node += container_body
        section += container_node

        return [section, addnodes.index(entries=[('single', name, 'process-' + slug, '', None)])]
Ejemplo n.º 2
0
    def register_descriptors(self, descriptor_schemas, user, force=False):
        """Read and register descriptors."""
        log_descriptors = []

        for ds in descriptor_schemas:

            for field in ['var', 'schema']:
                for schema, _, _ in iterate_schema({}, ds.get(field, {})):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in ds:
                ds['slug'] = slugify(ds.pop('name').replace(':', '-'))
                ds['name'] = ds.pop('label')

            if 'schema' not in ds:
                ds['schema'] = []

            if 'static' in ds:
                ds['schema'].extend(ds.pop('static'))
            if 'var' in ds:
                ds['schema'].extend(ds.pop('var'))

            if not self.valid(ds, DESCRIPTOR_SCHEMA):
                continue

            slug = ds['slug']
            version = ds.get('version', '0.0.0')
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug))
                continue

            descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version)
            if descriptor_query.exists():
                if not force:
                    self.stdout.write("Skip descriptor schema {}: same version installed".format(slug))
                    continue

                descriptor_query.update(**ds)
                log_descriptors.append("Updated {}".format(slug))
            else:
                DescriptorSchema.objects.create(contributor=user, **ds)
                log_descriptors.append("Inserted {}".format(slug))

        if len(log_descriptors) > 0:
            self.stdout.write("Descriptor schemas Updates:")
            for log in log_descriptors:
                self.stdout.write("  {}".format(log))
Ejemplo n.º 3
0
def add_post_save_handler(sender, instance, **kwargs):
    """Add object to flow_collection.

    * Only add `Data object` to `Sample` if process has defined
      `flow_collwection` field.
    * Add object to existing `Sample`, if `input objects` that
      belong to `flow collection` (but not necessary all
      `input objects`), are part of the same `Sample`.
    * If `input objects` belong to different `Samples` or do not belong
      to any `Sample`, create new `Sample`.

    Collect IDs of all `input objects`.

    """
    if kwargs['created'] and instance.process.flow_collection:
        input_objects = []
        for field_schema, fields, _ in iterate_schema(
                instance.input, instance.process.input_schema, ''):
            if 'name' in field_schema and 'type' in field_schema and field_schema[
                    'name'] in fields:
                field = fields[field_schema['name']]
                if field_schema['type'].startswith('data:'):
                    input_objects.append(field)
                if field_schema['type'].startswith('list:data:'):
                    input_objects.extend(field)

        sample_query = Sample.objects.filter(
            data__id__in=input_objects).distinct()

        if sample_query.count() == 1:
            sample = sample_query.first()
        else:
            des_schema = DescriptorSchema.objects.get(
                slug=instance.process.flow_collection)
            sample = Sample.objects.create(
                contributor=instance.contributor,
                descriptor_schema=des_schema,
                name=instance.name,
            )

            for permission in list(zip(*sample._meta.permissions))[0]:  # pylint: disable=protected-access
                shortcuts.assign_perm(permission, sample.contributor, sample)

            # XXX: This doesn't work, because signal is triggered before Data
            #      object is added to collections.
            # for collection in Collection.objects.filter(data=instance.pk):
            #     sample.collections.add(collection)

        sample.data.add(instance)
Ejemplo n.º 4
0
    def assertFields(self, obj, path, value):  # pylint: disable=invalid-name
        """Compare object's field to the given value.

        The file size is ignored. Use assertFile to validate
        file contents.

        :param obj: object with the field to compare
        :type obj: ~resolwe.flow.models.Data

        :param str path: path to
            :class:`~resolwe.flow.models.Data` object's field

        :param str value: desired value of
            :class:`~resolwe.flow.models.Data` object's field

        """
        field_schema, field = None, None
        for field_schema, field, field_path in iterate_schema(obj.output, obj.process.output_schema, ''):
            if path == field_path:
                break
        else:
            self.fail("Field not found in path {}".format(path))

        field_name = field_schema['name']
        field_value = field[field_name]

        def remove_file_size(field_value):
            """Remove size value from file field."""
            if 'size' in field_value:
                del field_value['size']

        # Ignore size in file and dir fields
        if (field_schema['type'].startswith('basic:file:') or
                field_schema['type'].startswith('basic:dir:')):
            remove_file_size(field_value)
            remove_file_size(value)

        elif (field_schema['type'].startswith('list:basic:file:') or
              field_schema['type'].startswith('list:basic:dir:')):
            for val in field_value:
                remove_file_size(val)
            for val in value:
                remove_file_size(val)

        self.assertEqual(field_value, value,
                         msg="Field 'output.{}' mismatch: {} != {}".format(path, field_value, value) +
                         self._debug_info(obj))
Ejemplo n.º 5
0
    def make_process_header(self, slug, typ, version, source_uri, description, inputs):
        """Generate a process definition header.

        :param str slug: process' slug
        :param str typ: process' type
        :param str version:  process' version
        :param str source_uri: url to the process definition
        :param str description: process' description
        :param dict inputs: process' inputs

        """
        node = addnodes.desc()
        signode = addnodes.desc_signature(slug, '')
        node.append(signode)

        node['objtype'] = node['desctype'] = typ

        signode += addnodes.desc_annotation(typ, typ, classes=['process-type'])
        signode += addnodes.desc_addname('', '')
        signode += addnodes.desc_name(slug + ' ', slug + ' ')

        paramlist = addnodes.desc_parameterlist()

        for field_schema, _, _ in iterate_schema({}, inputs, ''):
            field_type = field_schema['type']
            field_name = field_schema['name']

            field_default = field_schema.get('default', None)
            field_default = '' if field_default is None else '={}'.format(field_default)

            param = addnodes.desc_parameter('', '', noemph=True)
            param += nodes.emphasis(field_type, field_type, classes=['process-type'])
            # separate by non-breaking space in the output
            param += nodes.strong(text=u'\xa0\xa0' + field_name)

            paramlist += param

        signode += paramlist
        signode += nodes.reference('', nodes.Text('[Source: v{}]'.format(version)),
                                   refuri=source_uri, classes=['viewcode-link'])

        desc = nodes.paragraph()
        desc += nodes.Text(description, description)

        return [node, desc]
Ejemplo n.º 6
0
def add_post_save_handler(sender, instance, **kwargs):
    """Add object to flow_collection.

    * Only add `Data object` to `Sample` if process has defined
      `flow_collwection` field.
    * Add object to existing `Sample`, if `input objects` that
      belong to `flow collection` (but not necessary all
      `input objects`), are part of the same `Sample`.
    * If `input objects` belong to different `Samples` or do not belong
      to any `Sample`, create new `Sample`.

    Collect IDs of all `input objects`.

    """
    if kwargs["created"] and instance.process.flow_collection:
        input_objects = []
        for field_schema, fields, _ in iterate_schema(instance.input, instance.process.input_schema, ""):
            if "name" in field_schema and "type" in field_schema and field_schema["name"] in fields:
                field = fields[field_schema["name"]]
                if field_schema["type"].startswith("data:"):
                    input_objects.append(field)
                if field_schema["type"].startswith("list:data:"):
                    input_objects.extend(field)

        sample_query = Sample.objects.filter(data__id__in=input_objects).distinct()

        if sample_query.count() == 1:
            sample = sample_query.first()
        else:
            des_schema = DescriptorSchema.objects.get(slug=instance.process.flow_collection)
            sample = Sample.objects.create(
                contributor=instance.contributor, descriptor_schema=des_schema, name=instance.name
            )

            for permission in list(zip(*sample._meta.permissions))[0]:  # pylint: disable=protected-access
                shortcuts.assign_perm(permission, sample.contributor, sample)

            # XXX: This doesn't work, because signal is triggered before Data
            #      object is added to collections.
            # for collection in Collection.objects.filter(data=instance.pk):
            #     sample.collections.add(collection)

        sample.data.add(instance)
Ejemplo n.º 7
0
def add_post_save_handler(sender, instance, **kwargs):
    """Add object to flow_collection.

    * Only add `Data object` to `Sample` if process has defined
      `flow_collwection` field.
    * Add object to existing `Sample`, if `input objects` that
      belong to `flow collection` (but not necessary all
      `input objects`), are part of the same `Sample`.
    * If `input objects` belong to different `Samples` or do not belong
      to any `Sample`, create new `Sample`.

    Collect IDs of all `input objects`.

    """
    if kwargs['created'] and instance.process.flow_collection:
        input_objects = []
        for field_schema, fields, path in iterate_schema(instance.input, instance.process.input_schema, ''):
            if 'name' in field_schema and 'type' in field_schema and field_schema['name'] in fields:
                field = fields[field_schema['name']]
                if field_schema['type'].startswith('data:'):
                    input_objects.append(field)
                if field_schema['type'].startswith('list:data:'):
                    input_objects.extend(field)

        sample_query = Sample.objects.filter(data__id__in=input_objects).distinct()

        if sample_query.count() == 1:
            sample = sample_query.first()
        else:
            des_schema = DescriptorSchema.objects.get(slug=instance.process.flow_collection)
            sample = Sample.objects.create(
                contributor=instance.contributor,
                descriptor_schema=des_schema,
                name=instance.name,
            )

            for permission in list(zip(*sample._meta.permissions))[0]:
                shortcuts.assign_perm(permission, sample.contributor, sample)

        sample.data.add(instance)
Ejemplo n.º 8
0
    def test_processor_types(self):
        procs = list(Process.objects.all())
        types = {}
        errors_equals = set()
        errors_subtype = set()

        for p in procs:
            fields = sorted('{} {}'.format(pth, schema['type'])
                            for schema, _, pth in iterate_schema(
                                {}, p.output_schema, 'output'))
            if p.type not in types:
                types[p.type] = {'fields': fields, 'name': [p.name]}
            else:
                types[p.type]['name'].append(p.name)

                if types[p.type]['fields'] != fields:
                    errors_equals.add(p.type)

        if len(errors_equals) > 0:
            self.fail(
                'Processes of the same type should have the same output fields:\n\n    {}'
                .format('\n    '.join(', '.join(types[typ]['name'])
                                      for typ in errors_equals)))

        type_list = sorted(types)
        for i, typ in enumerate(type_list):
            for prev_typ in type_list[:i]:
                if typ.startswith(prev_typ):
                    prev_typ_fields = types[prev_typ]['fields']
                    typ_fields = types[typ]['fields']
                    if len(set(prev_typ_fields).difference(typ_fields)) > 0:
                        errors_subtype.add('{} {}'.format(prev_typ, typ))

        if len(errors_subtype) > 0:
            self.fail(
                'Processors should include all output fields of the parent type:\n\n    {}'
                .format('\n    '.join(errors_subtype)))
Ejemplo n.º 9
0
    def test_processor_types(self):
        procs = list(Process.objects.all())
        types = {}
        errors_equals = set()
        errors_subtype = set()

        for p in procs:
            fields = sorted('{} {}'.format(pth, schema['type']) for schema, _, pth in
                            iterate_schema({}, p.output_schema, 'output'))
            if p.type not in types:
                types[p.type] = {
                    'fields': fields,
                    'name': [p.name]
                }
            else:
                types[p.type]['name'].append(p.name)

                if types[p.type]['fields'] != fields:
                    errors_equals.add(p.type)

        if len(errors_equals) > 0:
            self.fail('Processes of the same type should have the same output fields:\n\n    {}'.format(
                '\n    '.join(', '.join(types[typ]['name']) for typ in errors_equals)))

        type_list = sorted(types)
        for i, typ in enumerate(type_list):
            for prev_typ in type_list[:i]:
                if typ.startswith(prev_typ):
                    prev_typ_fields = types[prev_typ]['fields']
                    typ_fields = types[typ]['fields']
                    if len(set(prev_typ_fields).difference(typ_fields)) > 0:
                        errors_subtype.add('{} {}'.format(prev_typ, typ))

        if len(errors_subtype) > 0:
            self.fail('Processors should include all output fields of the parent type:\n\n    {}'.format(
                '\n    '.join(errors_subtype)))
Ejemplo n.º 10
0
    def register_processes(self, process_schemas, user, force=False):
        """Read and register processors."""
        log_processors = []
        log_templates = []

        for p in process_schemas:
            if p['type'][-1] != ':':
                p['type'] += ':'

            if 'category' in p and not p['category'].endswith(':'):
                p['category'] += ':'

            # get `data_name` from `static`
            if 'static' in p:
                for schema, _, _ in iterate_schema({}, p['static']):
                    if schema['name'] == 'name' and 'default' in schema:
                        p['data_name'] = schema['default']

            # support backward compatibility
            # TODO: update .yml files and remove
            if 'slug' not in p:
                p['slug'] = slugify(p.pop('name').replace(':', '-'))
                p['name'] = p.pop('label')

                p.pop('var', None)
                p.pop('static', None)

            for field in ['input', 'output', 'var', 'static']:
                for schema, _, _ in iterate_schema({}, p[field] if field in p else {}):
                    if not schema['type'][-1].endswith(':'):
                        schema['type'] += ':'
            # TODO: Check if schemas validate with our JSON meta schema and Processor model docs.

            if not self.valid(p, PROCESSOR_SCHEMA):
                continue

            if 'persistence' in p:
                persistence_mapping = {
                    'RAW': Process.PERSISTENCE_RAW,
                    'CACHED': Process.PERSISTENCE_CACHED,
                    'TEMP': Process.PERSISTENCE_TEMP,
                }

                p['persistence'] = persistence_mapping[p['persistence']]

            if 'input' in p:
                p['input_schema'] = p.pop('input')

            if 'output' in p:
                p['output_schema'] = p.pop('output')

            slug = p['slug']
            version = p['version']
            int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS)

            # `latest version` is returned as `int` so it has to be compared to `int_version`
            latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max']
            if latest_version is not None and latest_version > int_version:
                self.stderr.write("Skip processor {}: newer version installed".format(slug))
                continue

            process_query = Process.objects.filter(slug=slug, version=version)
            if process_query.exists():
                if not force:
                    self.stdout.write("Skip processor {}: same version installed".format(slug))
                    continue

                process_query.update(**p)
                log_processors.append("Updated {}".format(slug))
            else:
                Process.objects.create(contributor=user, **p)
                log_processors.append("Inserted {}".format(slug))

        if len(log_processors) > 0:
            self.stdout.write("Processor Updates:")
            for log in log_processors:
                self.stdout.write("  {}".format(log))

        if len(log_templates) > 0:
            self.stdout.write("Default Template Updates:")
            for log in log_templates:
                self.stdout.write("  {}".format(log))
Ejemplo n.º 11
0
    def create(self, request, *args, **kwargs):
        """Create a resource."""
        collections = request.data.get('collections', [])

        # check that user has permissions on all collections that Data
        # object will be added to
        for collection_id in collections:
            try:
                collection = Collection.objects.get(pk=collection_id)
            except Collection.DoesNotExist:
                return Response({'collections': ['Invalid pk "{}" - object does not exist.'.format(collection_id)]},
                                status=status.HTTP_400_BAD_REQUEST)

            if not request.user.has_perm('add_collection', obj=collection):
                if request.user.is_authenticated():
                    raise exceptions.PermissionDenied
                else:
                    raise exceptions.NotFound

        # translate processe's slug to id
        process_slug = request.data.get('process', None)
        process_query = Process.objects.filter(slug=process_slug).order_by('version')
        if not process_query.exists():
            # XXX: security - is it ok to reveal which processes (don't) exist?
            return Response({'process': ['Invalid process slug "{}" - object does not exist.'.format(process_slug)]},
                            status=status.HTTP_400_BAD_REQUEST)
        process = process_query.last()
        request.data['process'] = process.pk

        # check that user has permission on the process
        if not request.user.has_perm('view_process', obj=process):
            if request.user.is_authenticated():
                raise exceptions.PermissionDenied
            else:
                raise exceptions.NotFound

        # perform "get_or_create" if requested - return existing object
        # if found
        if kwargs.pop('get_or_create', False):
            process_input = request.data.get('input', {})

            # use default values if they are not given
            for field_schema, fields, path in iterate_schema(process_input, process.input_schema):
                if 'default' in field_schema and field_schema['name'] not in fields:
                    dict_dot(process_input, path, field_schema['default'])

            checksum = get_data_checksum(process_input, process.slug, process.version)
            data_qs = Data.objects.filter(
                checksum=checksum,
                process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP],
            )
            data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
            if data_qs.exists():
                data = data_qs.order_by('created').last()
                serializer = self.get_serializer(data)
                return Response(serializer.data)

        # create the objects
        resp = super(ResolweCreateDataModelMixin, self).create(request, *args, **kwargs)

        # run manager
        manager.communicate()

        return resp