def make_process_node(self, process): """Fill the content of process definiton node. :param dict process: process data as given from yaml.load function :return: process node """ name = process['name'] slug = process['slug'] typ = process['type'] version = process['version'] description = process.get('description', '') source_uri = process['source_uri'] inputs = process.get('input', []) outputs = process.get('output', []) # Make process name a section title: section = nodes.section(ids=['process-' + slug]) section += nodes.title(name, name) # Make process header: section += self.make_process_header(slug, typ, version, source_uri, description, inputs) # Make inputs section: container_node = nodes.container(classes=['toggle']) container_header = nodes.paragraph(classes=['header']) container_header += nodes.strong(text='Input arguments') container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, inputs, ''): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node # Make outputs section: container_node = nodes.container(classes=['toggle']) container_header = nodes.paragraph(classes=['header']) container_header += nodes.strong(text='Output results') container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, outputs, ''): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node return [section, addnodes.index(entries=[('single', name, 'process-' + slug, '', None)])]
def register_descriptors(self, descriptor_schemas, user, force=False): """Read and register descriptors.""" log_descriptors = [] for ds in descriptor_schemas: for field in ['var', 'schema']: for schema, _, _ in iterate_schema({}, ds.get(field, {})): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # support backward compatibility # TODO: update .yml files and remove if 'slug' not in ds: ds['slug'] = slugify(ds.pop('name').replace(':', '-')) ds['name'] = ds.pop('label') if 'schema' not in ds: ds['schema'] = [] if 'static' in ds: ds['schema'].extend(ds.pop('static')) if 'var' in ds: ds['schema'].extend(ds.pop('var')) if not self.valid(ds, DESCRIPTOR_SCHEMA): continue slug = ds['slug'] version = ds.get('version', '0.0.0') int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug)) continue descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version) if descriptor_query.exists(): if not force: self.stdout.write("Skip descriptor schema {}: same version installed".format(slug)) continue descriptor_query.update(**ds) log_descriptors.append("Updated {}".format(slug)) else: DescriptorSchema.objects.create(contributor=user, **ds) log_descriptors.append("Inserted {}".format(slug)) if len(log_descriptors) > 0: self.stdout.write("Descriptor schemas Updates:") for log in log_descriptors: self.stdout.write(" {}".format(log))
def add_post_save_handler(sender, instance, **kwargs): """Add object to flow_collection. * Only add `Data object` to `Sample` if process has defined `flow_collwection` field. * Add object to existing `Sample`, if `input objects` that belong to `flow collection` (but not necessary all `input objects`), are part of the same `Sample`. * If `input objects` belong to different `Samples` or do not belong to any `Sample`, create new `Sample`. Collect IDs of all `input objects`. """ if kwargs['created'] and instance.process.flow_collection: input_objects = [] for field_schema, fields, _ in iterate_schema( instance.input, instance.process.input_schema, ''): if 'name' in field_schema and 'type' in field_schema and field_schema[ 'name'] in fields: field = fields[field_schema['name']] if field_schema['type'].startswith('data:'): input_objects.append(field) if field_schema['type'].startswith('list:data:'): input_objects.extend(field) sample_query = Sample.objects.filter( data__id__in=input_objects).distinct() if sample_query.count() == 1: sample = sample_query.first() else: des_schema = DescriptorSchema.objects.get( slug=instance.process.flow_collection) sample = Sample.objects.create( contributor=instance.contributor, descriptor_schema=des_schema, name=instance.name, ) for permission in list(zip(*sample._meta.permissions))[0]: # pylint: disable=protected-access shortcuts.assign_perm(permission, sample.contributor, sample) # XXX: This doesn't work, because signal is triggered before Data # object is added to collections. # for collection in Collection.objects.filter(data=instance.pk): # sample.collections.add(collection) sample.data.add(instance)
def assertFields(self, obj, path, value): # pylint: disable=invalid-name """Compare object's field to the given value. The file size is ignored. Use assertFile to validate file contents. :param obj: object with the field to compare :type obj: ~resolwe.flow.models.Data :param str path: path to :class:`~resolwe.flow.models.Data` object's field :param str value: desired value of :class:`~resolwe.flow.models.Data` object's field """ field_schema, field = None, None for field_schema, field, field_path in iterate_schema(obj.output, obj.process.output_schema, ''): if path == field_path: break else: self.fail("Field not found in path {}".format(path)) field_name = field_schema['name'] field_value = field[field_name] def remove_file_size(field_value): """Remove size value from file field.""" if 'size' in field_value: del field_value['size'] # Ignore size in file and dir fields if (field_schema['type'].startswith('basic:file:') or field_schema['type'].startswith('basic:dir:')): remove_file_size(field_value) remove_file_size(value) elif (field_schema['type'].startswith('list:basic:file:') or field_schema['type'].startswith('list:basic:dir:')): for val in field_value: remove_file_size(val) for val in value: remove_file_size(val) self.assertEqual(field_value, value, msg="Field 'output.{}' mismatch: {} != {}".format(path, field_value, value) + self._debug_info(obj))
def make_process_header(self, slug, typ, version, source_uri, description, inputs): """Generate a process definition header. :param str slug: process' slug :param str typ: process' type :param str version: process' version :param str source_uri: url to the process definition :param str description: process' description :param dict inputs: process' inputs """ node = addnodes.desc() signode = addnodes.desc_signature(slug, '') node.append(signode) node['objtype'] = node['desctype'] = typ signode += addnodes.desc_annotation(typ, typ, classes=['process-type']) signode += addnodes.desc_addname('', '') signode += addnodes.desc_name(slug + ' ', slug + ' ') paramlist = addnodes.desc_parameterlist() for field_schema, _, _ in iterate_schema({}, inputs, ''): field_type = field_schema['type'] field_name = field_schema['name'] field_default = field_schema.get('default', None) field_default = '' if field_default is None else '={}'.format(field_default) param = addnodes.desc_parameter('', '', noemph=True) param += nodes.emphasis(field_type, field_type, classes=['process-type']) # separate by non-breaking space in the output param += nodes.strong(text=u'\xa0\xa0' + field_name) paramlist += param signode += paramlist signode += nodes.reference('', nodes.Text('[Source: v{}]'.format(version)), refuri=source_uri, classes=['viewcode-link']) desc = nodes.paragraph() desc += nodes.Text(description, description) return [node, desc]
def add_post_save_handler(sender, instance, **kwargs): """Add object to flow_collection. * Only add `Data object` to `Sample` if process has defined `flow_collwection` field. * Add object to existing `Sample`, if `input objects` that belong to `flow collection` (but not necessary all `input objects`), are part of the same `Sample`. * If `input objects` belong to different `Samples` or do not belong to any `Sample`, create new `Sample`. Collect IDs of all `input objects`. """ if kwargs["created"] and instance.process.flow_collection: input_objects = [] for field_schema, fields, _ in iterate_schema(instance.input, instance.process.input_schema, ""): if "name" in field_schema and "type" in field_schema and field_schema["name"] in fields: field = fields[field_schema["name"]] if field_schema["type"].startswith("data:"): input_objects.append(field) if field_schema["type"].startswith("list:data:"): input_objects.extend(field) sample_query = Sample.objects.filter(data__id__in=input_objects).distinct() if sample_query.count() == 1: sample = sample_query.first() else: des_schema = DescriptorSchema.objects.get(slug=instance.process.flow_collection) sample = Sample.objects.create( contributor=instance.contributor, descriptor_schema=des_schema, name=instance.name ) for permission in list(zip(*sample._meta.permissions))[0]: # pylint: disable=protected-access shortcuts.assign_perm(permission, sample.contributor, sample) # XXX: This doesn't work, because signal is triggered before Data # object is added to collections. # for collection in Collection.objects.filter(data=instance.pk): # sample.collections.add(collection) sample.data.add(instance)
def add_post_save_handler(sender, instance, **kwargs): """Add object to flow_collection. * Only add `Data object` to `Sample` if process has defined `flow_collwection` field. * Add object to existing `Sample`, if `input objects` that belong to `flow collection` (but not necessary all `input objects`), are part of the same `Sample`. * If `input objects` belong to different `Samples` or do not belong to any `Sample`, create new `Sample`. Collect IDs of all `input objects`. """ if kwargs['created'] and instance.process.flow_collection: input_objects = [] for field_schema, fields, path in iterate_schema(instance.input, instance.process.input_schema, ''): if 'name' in field_schema and 'type' in field_schema and field_schema['name'] in fields: field = fields[field_schema['name']] if field_schema['type'].startswith('data:'): input_objects.append(field) if field_schema['type'].startswith('list:data:'): input_objects.extend(field) sample_query = Sample.objects.filter(data__id__in=input_objects).distinct() if sample_query.count() == 1: sample = sample_query.first() else: des_schema = DescriptorSchema.objects.get(slug=instance.process.flow_collection) sample = Sample.objects.create( contributor=instance.contributor, descriptor_schema=des_schema, name=instance.name, ) for permission in list(zip(*sample._meta.permissions))[0]: shortcuts.assign_perm(permission, sample.contributor, sample) sample.data.add(instance)
def test_processor_types(self): procs = list(Process.objects.all()) types = {} errors_equals = set() errors_subtype = set() for p in procs: fields = sorted('{} {}'.format(pth, schema['type']) for schema, _, pth in iterate_schema( {}, p.output_schema, 'output')) if p.type not in types: types[p.type] = {'fields': fields, 'name': [p.name]} else: types[p.type]['name'].append(p.name) if types[p.type]['fields'] != fields: errors_equals.add(p.type) if len(errors_equals) > 0: self.fail( 'Processes of the same type should have the same output fields:\n\n {}' .format('\n '.join(', '.join(types[typ]['name']) for typ in errors_equals))) type_list = sorted(types) for i, typ in enumerate(type_list): for prev_typ in type_list[:i]: if typ.startswith(prev_typ): prev_typ_fields = types[prev_typ]['fields'] typ_fields = types[typ]['fields'] if len(set(prev_typ_fields).difference(typ_fields)) > 0: errors_subtype.add('{} {}'.format(prev_typ, typ)) if len(errors_subtype) > 0: self.fail( 'Processors should include all output fields of the parent type:\n\n {}' .format('\n '.join(errors_subtype)))
def test_processor_types(self): procs = list(Process.objects.all()) types = {} errors_equals = set() errors_subtype = set() for p in procs: fields = sorted('{} {}'.format(pth, schema['type']) for schema, _, pth in iterate_schema({}, p.output_schema, 'output')) if p.type not in types: types[p.type] = { 'fields': fields, 'name': [p.name] } else: types[p.type]['name'].append(p.name) if types[p.type]['fields'] != fields: errors_equals.add(p.type) if len(errors_equals) > 0: self.fail('Processes of the same type should have the same output fields:\n\n {}'.format( '\n '.join(', '.join(types[typ]['name']) for typ in errors_equals))) type_list = sorted(types) for i, typ in enumerate(type_list): for prev_typ in type_list[:i]: if typ.startswith(prev_typ): prev_typ_fields = types[prev_typ]['fields'] typ_fields = types[typ]['fields'] if len(set(prev_typ_fields).difference(typ_fields)) > 0: errors_subtype.add('{} {}'.format(prev_typ, typ)) if len(errors_subtype) > 0: self.fail('Processors should include all output fields of the parent type:\n\n {}'.format( '\n '.join(errors_subtype)))
def register_processes(self, process_schemas, user, force=False): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: if p['type'][-1] != ':': p['type'] += ':' if 'category' in p and not p['category'].endswith(':'): p['category'] += ':' # get `data_name` from `static` if 'static' in p: for schema, _, _ in iterate_schema({}, p['static']): if schema['name'] == 'name' and 'default' in schema: p['data_name'] = schema['default'] # support backward compatibility # TODO: update .yml files and remove if 'slug' not in p: p['slug'] = slugify(p.pop('name').replace(':', '-')) p['name'] = p.pop('label') p.pop('var', None) p.pop('static', None) for field in ['input', 'output', 'var', 'static']: for schema, _, _ in iterate_schema({}, p[field] if field in p else {}): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if 'persistence' in p: persistence_mapping = { 'RAW': Process.PERSISTENCE_RAW, 'CACHED': Process.PERSISTENCE_CACHED, 'TEMP': Process.PERSISTENCE_TEMP, } p['persistence'] = persistence_mapping[p['persistence']] if 'input' in p: p['input_schema'] = p.pop('input') if 'output' in p: p['output_schema'] = p.pop('output') slug = p['slug'] version = p['version'] int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write("Skip processor {}: newer version installed".format(slug)) continue process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: self.stdout.write("Skip processor {}: same version installed".format(slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: Process.objects.create(contributor=user, **p) log_processors.append("Inserted {}".format(slug)) if len(log_processors) > 0: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if len(log_templates) > 0: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))
def create(self, request, *args, **kwargs): """Create a resource.""" collections = request.data.get('collections', []) # check that user has permissions on all collections that Data # object will be added to for collection_id in collections: try: collection = Collection.objects.get(pk=collection_id) except Collection.DoesNotExist: return Response({'collections': ['Invalid pk "{}" - object does not exist.'.format(collection_id)]}, status=status.HTTP_400_BAD_REQUEST) if not request.user.has_perm('add_collection', obj=collection): if request.user.is_authenticated(): raise exceptions.PermissionDenied else: raise exceptions.NotFound # translate processe's slug to id process_slug = request.data.get('process', None) process_query = Process.objects.filter(slug=process_slug).order_by('version') if not process_query.exists(): # XXX: security - is it ok to reveal which processes (don't) exist? return Response({'process': ['Invalid process slug "{}" - object does not exist.'.format(process_slug)]}, status=status.HTTP_400_BAD_REQUEST) process = process_query.last() request.data['process'] = process.pk # check that user has permission on the process if not request.user.has_perm('view_process', obj=process): if request.user.is_authenticated(): raise exceptions.PermissionDenied else: raise exceptions.NotFound # perform "get_or_create" if requested - return existing object # if found if kwargs.pop('get_or_create', False): process_input = request.data.get('input', {}) # use default values if they are not given for field_schema, fields, path in iterate_schema(process_input, process.input_schema): if 'default' in field_schema and field_schema['name'] not in fields: dict_dot(process_input, path, field_schema['default']) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data) # create the objects resp = super(ResolweCreateDataModelMixin, self).create(request, *args, **kwargs) # run manager manager.communicate() return resp