def make_process_node(self, process): """Fill the content of process definiton node. :param dict process: process data as given from yaml.load function :return: process node """ name = process['name'] slug = process['slug'] typ = process['type'] version = process['version'] description = process.get('description', '') source_uri = process['source_uri'] inputs = process.get('input', []) outputs = process.get('output', []) # Make process name a section title: section = nodes.section(ids=['process-' + slug]) section += nodes.title(name, name) # Make process header: section += self.make_process_header(slug, typ, version, source_uri, description, inputs) # Make inputs section: container_node = nodes.container(classes=['toggle']) container_header = nodes.paragraph(classes=['header']) container_header += nodes.strong(text='Input arguments') container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, inputs, ''): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node # Make outputs section: container_node = nodes.container(classes=['toggle']) container_header = nodes.paragraph(classes=['header']) container_header += nodes.strong(text='Output results') container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, outputs, ''): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node return [ section, addnodes.index(entries=[('single', name, 'process-' + slug, '', None)]) ]
def make_process_node(self, process): """Fill the content of process definiton node. :param dict process: process data as given from yaml.load function :return: process node """ name = process["name"] slug = process["slug"] typ = process["type"] version = process["version"] description = process.get("description", "") source_uri = process["source_uri"] inputs = process.get("input", []) outputs = process.get("output", []) # Make process name a section title: section = nodes.section(ids=["process-" + slug]) section += nodes.title(name, name) # Make process header: section += self.make_process_header(slug, typ, version, source_uri, description, inputs) # Make inputs section: container_node = nodes.container(classes=["toggle"]) container_header = nodes.paragraph(classes=["header"]) container_header += nodes.strong(text="Input arguments") container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, inputs, ""): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node # Make outputs section: container_node = nodes.container(classes=["toggle"]) container_header = nodes.paragraph(classes=["header"]) container_header += nodes.strong(text="Output results") container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, outputs, ""): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node return [ section, addnodes.index(entries=[("single", name, "process-" + slug, "", None)]), ]
def make_process_node(self, process): """Fill the content of process definiton node. :param dict process: process data as given from yaml.load function :return: process node """ name = process['name'] slug = process['slug'] typ = process['type'] version = process['version'] description = process.get('description', '') source_uri = process['source_uri'] inputs = process.get('input', []) outputs = process.get('output', []) # Make process name a section title: section = nodes.section(ids=['process-' + slug]) section += nodes.title(name, name) # Make process header: section += self.make_process_header(slug, typ, version, source_uri, description, inputs) # Make inputs section: container_node = nodes.container(classes=['toggle']) container_header = nodes.paragraph(classes=['header']) container_header += nodes.strong(text='Input arguments') container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, inputs, ''): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node # Make outputs section: container_node = nodes.container(classes=['toggle']) container_header = nodes.paragraph(classes=['header']) container_header += nodes.strong(text='Output results') container_node += container_header container_body = nodes.container() for field_schema, _, path in iterate_schema({}, outputs, ''): container_body += nodes.strong(text=path) container_body += self.make_properties_list(field_schema) container_node += container_body section += container_node return [section, addnodes.index(entries=[('single', name, 'process-' + slug, '', None)])]
def valid(self, instance, schema): """Validate schema.""" try: jsonschema.validate(instance, schema) except jsonschema.exceptions.ValidationError as ex: self.stderr.write(" VALIDATION ERROR: {}".format( instance["name"] if "name" in instance else "")) self.stderr.write(" path: {}".format(ex.path)) self.stderr.write(" message: {}".format(ex.message)) self.stderr.write(" validator: {}".format(ex.validator)) self.stderr.write(" val. value: {}".format( ex.validator_value)) return False try: # Check that default values fit field schema. for field in ["input", "output", "schema"]: for schema, _, path in iterate_schema({}, instance.get(field, {})): if "default" in schema: validate_schema({schema["name"]: schema["default"]}, [schema]) except ValidationError: self.stderr.write(" VALIDATION ERROR: {}".format( instance["name"])) self.stderr.write( " Default value of field '{}' is not valid.".format( path)) return False return True
def render_descriptor(data): """Render data descriptor. The rendering is based on descriptor schema and input context. :param data: data instance :type data: :class:`resolwe.flow.models.Data` or :class:`dict` """ if not data.descriptor_schema: return inputs = copy.deepcopy(data.input) if data.process.input_schema: hydrate_input_references(inputs, data.process.input_schema, hydrate_values=False) template_context = inputs # Set default values for field_schema, field, path in iterate_schema( data.descriptor, data.descriptor_schema.schema, 'descriptor'): if 'default' in field_schema and field_schema['name'] not in field: tmpl = field_schema['default'] if field_schema['type'].startswith('list:'): tmpl = [ render_template(data.process, tmp, template_context) if isinstance(tmp, six.string_types) else tmp for tmp in tmpl ] elif isinstance(tmpl, six.string_types): tmpl = render_template(data.process, tmpl, template_context) dict_dot(data, path, tmpl)
def fill_with_defaults(process_input, input_schema): """Fill empty optional fields in input with default values.""" for field_schema, fields, path in iterate_schema( process_input, input_schema, include_groups=True ): if "group" in field_schema and field_schema["name"] not in fields: dict_dot(process_input, path, {}) if "default" in field_schema and field_schema["name"] not in fields: dict_dot(process_input, path, field_schema["default"])
def storage_fields(self) -> Set[str]: """Get the names of storage fields in schema.""" if self._storage_fields is None: self._storage_fields = { field_name for schema, _, field_name in iterate_schema( self.data.output, self.data.process.output_schema) if schema["type"].startswith("basic:json:") } return self._storage_fields
def assertFields(self, obj, path, value): """Compare object's field to the given value. The file size is ignored. Use assertFile to validate file contents. :param obj: object with the field to compare :type obj: ~resolwe.flow.models.Data :param str path: path to :class:`~resolwe.flow.models.Data` object's field :param str value: desired value of :class:`~resolwe.flow.models.Data` object's field """ field_schema, field = None, None for field_schema, field, field_path in iterate_schema( obj.output, obj.process.output_schema, "" ): if path == field_path: break else: self.fail("Field not found in path {}".format(path)) field_name = field_schema["name"] field_value = field[field_name] def remove_file_size(field_value): """Remove size value from file field.""" if "size" in field_value: del field_value["size"] # Ignore size in file and dir fields if field_schema["type"].startswith("basic:file:") or field_schema[ "type" ].startswith("basic:dir:"): remove_file_size(field_value) remove_file_size(value) elif field_schema["type"].startswith("list:basic:file:") or field_schema[ "type" ].startswith("list:basic:dir:"): for val in field_value: remove_file_size(val) for val in value: remove_file_size(val) self.assertEqual( field_value, value, msg="Field 'output.{}' mismatch: {} != {}".format(path, field_value, value) + self._debug_info(obj), )
def register_descriptors(self, descriptor_schemas, user, force=False, verbosity=1): """Read and register descriptors.""" log_descriptors = [] for descriptor_schema in descriptor_schemas: for field in ['var', 'schema']: for schema, _, _ in iterate_schema({}, descriptor_schema.get(field, {})): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # support backward compatibility # TODO: update .yml files and remove if 'slug' not in descriptor_schema: descriptor_schema['slug'] = slugify(descriptor_schema.pop('name').replace(':', '-')) descriptor_schema['name'] = descriptor_schema.pop('label') if 'schema' not in descriptor_schema: descriptor_schema['schema'] = [] if 'static' in descriptor_schema: descriptor_schema['schema'].extend(descriptor_schema.pop('static')) if 'var' in descriptor_schema: descriptor_schema['schema'].extend(descriptor_schema.pop('var')) if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA): continue slug = descriptor_schema['slug'] version = descriptor_schema.get('version', '0.0.0') int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug)) continue descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version) if descriptor_query.exists(): if not force: if verbosity > 0: self.stdout.write("Skip descriptor schema {}: same version installed".format(slug)) continue descriptor_query.update(**descriptor_schema) log_descriptors.append("Updated {}".format(slug)) else: DescriptorSchema.objects.create(contributor=user, **descriptor_schema) log_descriptors.append("Inserted {}".format(slug)) if len(log_descriptors) > 0 and verbosity > 0: self.stdout.write("Descriptor schemas Updates:") for log in log_descriptors: self.stdout.write(" {}".format(log))
def make_process_header(self, slug, typ, version, source_uri, description, inputs): """Generate a process definition header. :param str slug: process' slug :param str typ: process' type :param str version: process' version :param str source_uri: url to the process definition :param str description: process' description :param dict inputs: process' inputs """ node = addnodes.desc() signode = addnodes.desc_signature(slug, "") node.append(signode) node["objtype"] = node["desctype"] = typ signode += addnodes.desc_annotation(typ, typ, classes=["process-type"]) signode += addnodes.desc_addname("", "") signode += addnodes.desc_name(slug + " ", slug + " ") paramlist = addnodes.desc_parameterlist() for field_schema, _, _ in iterate_schema({}, inputs, ""): field_type = field_schema["type"] field_name = field_schema["name"] field_default = field_schema.get("default", None) field_default = "" if field_default is None else "={}".format( field_default) param = addnodes.desc_parameter("", "", noemph=True) param += nodes.emphasis(field_type, field_type, classes=["process-type"]) # separate by non-breaking space in the output param += nodes.strong(text="\xa0\xa0" + field_name) paramlist += param signode += paramlist signode += nodes.reference( "", nodes.Text("[Source: v{}]".format(version)), refuri=source_uri, classes=["viewcode-link"], ) desc = nodes.paragraph() desc += nodes.Text(description, description) return [node, desc]
def make_process_header(self, slug, typ, version, source_uri, description, inputs): """Generate a process definition header. :param str slug: process' slug :param str typ: process' type :param str version: process' version :param str source_uri: url to the process definition :param str description: process' description :param dict inputs: process' inputs """ node = addnodes.desc() signode = addnodes.desc_signature(slug, '') node.append(signode) node['objtype'] = node['desctype'] = typ signode += addnodes.desc_annotation(typ, typ, classes=['process-type']) signode += addnodes.desc_addname('', '') signode += addnodes.desc_name(slug + ' ', slug + ' ') paramlist = addnodes.desc_parameterlist() for field_schema, _, _ in iterate_schema({}, inputs, ''): field_type = field_schema['type'] field_name = field_schema['name'] field_default = field_schema.get('default', None) field_default = '' if field_default is None else '={}'.format( field_default) param = addnodes.desc_parameter('', '', noemph=True) param += nodes.emphasis(field_type, field_type, classes=['process-type']) # separate by non-breaking space in the output param += nodes.strong(text='\xa0\xa0' + field_name) paramlist += param signode += paramlist signode += nodes.reference('', nodes.Text('[Source: v{}]'.format(version)), refuri=source_uri, classes=['viewcode-link']) desc = nodes.paragraph() desc += nodes.Text(description, description) return [node, desc]
def register_descriptors(self, descriptor_schemas, user, force=False, verbosity=1): """Read and register descriptors.""" log_descriptors = [] for descriptor_schema in descriptor_schemas: for schema, _, _ in iterate_schema({}, descriptor_schema.get('schema', {})): if not schema['type'][-1].endswith(':'): schema['type'] += ':' if 'schema' not in descriptor_schema: descriptor_schema['schema'] = [] if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA): continue slug = descriptor_schema['slug'] version = descriptor_schema.get('version', '0.0.0') int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = DescriptorSchema.objects.filter(slug=slug).aggregate(Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write("Skip descriptor schema {}: newer version installed".format(slug)) continue previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug) if previous_descriptor_qs.exists(): previous_descriptor = previous_descriptor_qs.latest() else: previous_descriptor = None descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version) if descriptor_query.exists(): if not force: if verbosity > 0: self.stdout.write("Skip descriptor schema {}: same version installed".format(slug)) continue descriptor_query.update(**descriptor_schema) log_descriptors.append("Updated {}".format(slug)) else: descriptor = DescriptorSchema.objects.create(contributor=user, **descriptor_schema) assign_contributor_permissions(descriptor) if previous_descriptor: copy_permissions(previous_descriptor, descriptor) log_descriptors.append("Inserted {}".format(slug)) if log_descriptors and verbosity > 0: self.stdout.write("Descriptor schemas Updates:") for log in log_descriptors: self.stdout.write(" {}".format(log))
def assertFields(self, obj, path, value): # pylint: disable=invalid-name """Compare object's field to the given value. The file size is ignored. Use assertFile to validate file contents. :param obj: object with the field to compare :type obj: ~resolwe.flow.models.Data :param str path: path to :class:`~resolwe.flow.models.Data` object's field :param str value: desired value of :class:`~resolwe.flow.models.Data` object's field """ field_schema, field = None, None for field_schema, field, field_path in iterate_schema(obj.output, obj.process.output_schema, ''): if path == field_path: break else: self.fail("Field not found in path {}".format(path)) field_name = field_schema['name'] field_value = field[field_name] def remove_file_size(field_value): """Remove size value from file field.""" if 'size' in field_value: del field_value['size'] # Ignore size in file and dir fields if (field_schema['type'].startswith('basic:file:') or field_schema['type'].startswith('basic:dir:')): remove_file_size(field_value) remove_file_size(value) elif (field_schema['type'].startswith('list:basic:file:') or field_schema['type'].startswith('list:basic:dir:')): for val in field_value: remove_file_size(val) for val in value: remove_file_size(val) self.assertEqual( field_value, value, msg="Field 'output.{}' mismatch: {} != {}".format(path, field_value, value) + self._debug_info(obj) )
def render_descriptor(data): """Render data descriptor. The rendering is based on descriptor schema and input context. :param data: data instance :type data: :class:`resolwe.flow.models.Data` or :class:`dict` """ if not data.descriptor_schema: return # Set default values for field_schema, field, path in iterate_schema(data.descriptor, data.descriptor_schema.schema, 'descriptor'): if 'default' in field_schema and field_schema['name'] not in field: dict_dot(data, path, field_schema['default'])
def make_process_header(self, slug, typ, version, source_uri, description, inputs): """Generate a process definition header. :param str slug: process' slug :param str typ: process' type :param str version: process' version :param str source_uri: url to the process definition :param str description: process' description :param dict inputs: process' inputs """ node = addnodes.desc() signode = addnodes.desc_signature(slug, '') node.append(signode) node['objtype'] = node['desctype'] = typ signode += addnodes.desc_annotation(typ, typ, classes=['process-type']) signode += addnodes.desc_addname('', '') signode += addnodes.desc_name(slug + ' ', slug + ' ') paramlist = addnodes.desc_parameterlist() for field_schema, _, _ in iterate_schema({}, inputs, ''): field_type = field_schema['type'] field_name = field_schema['name'] field_default = field_schema.get('default', None) field_default = '' if field_default is None else '={}'.format(field_default) param = addnodes.desc_parameter('', '', noemph=True) param += nodes.emphasis(field_type, field_type, classes=['process-type']) # separate by non-breaking space in the output param += nodes.strong(text='\xa0\xa0' + field_name) paramlist += param signode += paramlist signode += nodes.reference('', nodes.Text('[Source: v{}]'.format(version)), refuri=source_uri, classes=['viewcode-link']) desc = nodes.paragraph() desc += nodes.Text(description, description) return [node, desc]
def test_processor_types(self): procs = list(Process.objects.all()) types = {} errors_equals = set() errors_subtype = set() for p in procs: fields = sorted( "{} {}".format(pth, schema["type"]) for schema, _, pth in iterate_schema({}, p.output_schema, "output") ) if p.type not in types: types[p.type] = {"fields": fields, "name": [p.name]} else: types[p.type]["name"].append(p.name) if types[p.type]["fields"] != fields: errors_equals.add(p.type) if errors_equals: self.fail( "Processes of the same type should have the same output fields:\n\n {}".format( "\n ".join( ", ".join(types[typ]["name"]) for typ in errors_equals ) ) ) type_list = sorted(types) for i, typ in enumerate(type_list): for prev_typ in type_list[:i]: if typ.startswith(prev_typ): prev_typ_fields = types[prev_typ]["fields"] typ_fields = types[typ]["fields"] if set(prev_typ_fields).difference(typ_fields): errors_subtype.add("{} {}".format(prev_typ, typ)) if errors_subtype: self.fail( "Processors should include all output fields of the parent type:\n\n {}".format( "\n ".join(errors_subtype) ) )
def test_processor_types(self): procs = list(Process.objects.all()) types = {} errors_equals = set() errors_subtype = set() for p in procs: fields = sorted('{} {}'.format(pth, schema['type']) for schema, _, pth in iterate_schema( {}, p.output_schema, 'output')) if p.type not in types: types[p.type] = {'fields': fields, 'name': [p.name]} else: types[p.type]['name'].append(p.name) if types[p.type]['fields'] != fields: errors_equals.add(p.type) if errors_equals: self.fail( 'Processes of the same type should have the same output fields:\n\n {}' .format('\n '.join(', '.join(types[typ]['name']) for typ in errors_equals))) type_list = sorted(types) for i, typ in enumerate(type_list): for prev_typ in type_list[:i]: if typ.startswith(prev_typ): prev_typ_fields = types[prev_typ]['fields'] typ_fields = types[typ]['fields'] if set(prev_typ_fields).difference(typ_fields): errors_subtype.add('{} {}'.format(prev_typ, typ)) if errors_subtype: self.fail( 'Processors should include all output fields of the parent type:\n\n {}' .format('\n '.join(errors_subtype)))
def valid(self, instance, schema): """Validate schema.""" try: jsonschema.validate(instance, schema) except jsonschema.exceptions.ValidationError as ex: self.stderr.write(" VALIDATION ERROR: {}".format(instance['name'] if 'name' in instance else '')) self.stderr.write(" path: {}".format(ex.path)) self.stderr.write(" message: {}".format(ex.message)) self.stderr.write(" validator: {}".format(ex.validator)) self.stderr.write(" val. value: {}".format(ex.validator_value)) return False try: # Check that default values fit field schema. for field in ['input', 'output', 'schema']: for schema, _, path in iterate_schema({}, instance.get(field, {})): if 'default' in schema: validate_schema({schema['name']: schema['default']}, [schema]) except ValidationError: self.stderr.write(" VALIDATION ERROR: {}".format(instance['name'])) self.stderr.write(" Default value of field '{}' is not valid.". format(path)) return False return True
def test_processor_types(self): procs = list(Process.objects.all()) types = {} errors_equals = set() errors_subtype = set() for p in procs: fields = sorted('{} {}'.format(pth, schema['type']) for schema, _, pth in iterate_schema({}, p.output_schema, 'output')) if p.type not in types: types[p.type] = { 'fields': fields, 'name': [p.name] } else: types[p.type]['name'].append(p.name) if types[p.type]['fields'] != fields: errors_equals.add(p.type) if errors_equals: self.fail('Processes of the same type should have the same output fields:\n\n {}'.format( '\n '.join(', '.join(types[typ]['name']) for typ in errors_equals))) type_list = sorted(types) for i, typ in enumerate(type_list): for prev_typ in type_list[:i]: if typ.startswith(prev_typ): prev_typ_fields = types[prev_typ]['fields'] typ_fields = types[typ]['fields'] if set(prev_typ_fields).difference(typ_fields): errors_subtype.add('{} {}'.format(prev_typ, typ)) if errors_subtype: self.fail('Processors should include all output fields of the parent type:\n\n {}'.format( '\n '.join(errors_subtype)))
def validate_schema(instance, schema, test_required=True, path_prefix=None): """Check if DictField values are consistent with our data types. Perform basic JSON schema validation and our custom validations: * check that required fields are given (if `test_required` is set to ``True``) * check if ``basic:file:`` and ``list:basic:file`` fields match regex given in schema (only if ``validate_regex`` is defined in schema for coresponding fields) and exists (only if ``path_prefix`` is given) * check if directories referenced in ``basic:dir:`` and ``list:basic:dir``fields exist (only if ``path_prefix`` is given) * check that referenced ``Data`` objects (in ``data:<data_type>`` and ``list:data:<data_type>`` fields) exists and are of type ``<data_type>`` * check that referenced ``Storage`` objects (in ``basic:json`` fields) exists :param list instance: Instance to be validated :param list schema: Schema for validation :param bool test_required: Flag for testing if all required fields are present. It is usefule if validation is run before ``Data`` object is finished and there are some field stil missing (default: ``False``) :param str path_prefix: path prefix used for checking if files and directories exist (default: ``None``) :rtype: None :raises ValidationError: if ``instance`` doesn't match schema defined in ``schema`` """ def validate_refs(field): """Validate reference paths.""" if 'refs' in field: for refs_filename in field['refs']: refs_path = os.path.join(path_prefix, refs_filename) if not (os.path.isfile(refs_path) or os.path.isdir(refs_path)): raise ValidationError( "File referenced in `refs` ({}) does not exist".format( refs_path)) def validate_file(field, regex): """Validate file name (and check that it exists).""" filename = field['file'] if regex and not re.search(regex, filename): raise ValidationError( "File name {} does not match regex {}".format(filename, regex)) if path_prefix: path = os.path.join(path_prefix, filename) if not os.path.isfile(path): raise ValidationError( "Referenced file ({}) does not exist".format(path)) validate_refs(field) def validate_dir(field): """Check that dirs and referenced files exists.""" dirname = field['dir'] if path_prefix: path = os.path.join(path_prefix, dirname) if not os.path.isdir(path): raise ValidationError( "Referenced dir ({}) does not exist".format(path)) validate_refs(field) def validate_data(data_pk, type_): """Check that `Data` objects exist and is of right type.""" from .data import Data # prevent circular import data_qs = Data.objects.filter(pk=data_pk).values('process__type') if not data_qs.exists(): raise ValidationError( "Referenced `Data` object does not exist (id:{})".format( data_pk)) data = data_qs.first() if not data['process__type'].startswith(type_): raise ValidationError( "Data object of type `{}` is required, but type `{}` is given. " "(id:{})".format(type_, data['process__type'], data_pk)) is_dirty = False dirty_fields = [] for _schema, _fields, _ in iterate_schema(instance, schema): name = _schema['name'] is_required = _schema.get('required', True) if test_required and is_required and name not in _fields: is_dirty = True dirty_fields.append(name) if name in _fields: field = _fields[name] type_ = _schema.get('type', "") # Treat None as if the field is missing. if not is_required and field is None: continue try: jsonschema.validate([{ "type": type_, "value": field }], TYPE_SCHEMA) except jsonschema.exceptions.ValidationError as ex: raise ValidationError(ex.message) choices = [ choice['value'] for choice in _schema.get('choices', []) ] allow_custom_choice = _schema.get('allow_custom_choice', False) if choices and not allow_custom_choice and field not in choices: raise ValidationError( "Value of field '{}' must match one of predefined choices. " "Current value: {}".format(name, field)) if type_ == 'basic:file:': validate_file(field, _schema.get('validate_regex')) elif type_ == 'list:basic:file:': for obj in field: validate_file(obj, _schema.get('validate_regex')) elif type_ == 'basic:dir:': validate_dir(field) elif type_ == 'list:basic:dir:': for obj in field: validate_dir(obj) elif type_ == 'basic:json:' and not Storage.objects.filter( pk=field).exists(): raise ValidationError( "Referenced `Storage` object does not exist (id:{})". format(field)) elif type_.startswith('data:'): validate_data(field, type_) elif type_.startswith('list:data:'): for data_id in field: validate_data(data_id, type_[5:]) # remove `list:` from type try: # Check that schema definitions exist for all fields for _, _ in iterate_fields(instance, schema): pass except KeyError as ex: raise ValidationError(str(ex)) if is_dirty: dirty_fields = ['"{}"'.format(field) for field in dirty_fields] raise DirtyError("Required fields {} not given.".format( ', '.join(dirty_fields)))
def create(self, request, *args, **kwargs): """Create a resource.""" collections = request.data.get('collections', []) # check that user has permissions on all collections that Data # object will be added to for collection_id in collections: try: collection = Collection.objects.get(pk=collection_id) except Collection.DoesNotExist: return Response( { 'collections': [ 'Invalid pk "{}" - object does not exist.'.format( collection_id) ] }, status=status.HTTP_400_BAD_REQUEST) if not request.user.has_perm('add_collection', obj=collection): if request.user.has_perm('view_collection', obj=collection): raise exceptions.PermissionDenied( "You don't have `ADD` permission on collection (id: {})." .format(collection_id)) else: raise exceptions.NotFound( "Collection not found (id: {}).".format(collection_id)) # translate processe's slug to id process_slug = request.data.get('process', None) process_query = Process.objects.filter(slug=process_slug) process_query = get_objects_for_user(request.user, 'view_process', process_query) try: process = process_query.latest() except Process.DoesNotExist: return Response( { 'process': [ 'Invalid process slug "{}" - object does not exist.'. format(process_slug) ] }, status=status.HTTP_400_BAD_REQUEST) request.data['process'] = process.pk # perform "get_or_create" if requested - return existing object # if found if kwargs.pop('get_or_create', False): process_input = request.data.get('input', {}) # use default values if they are not given for field_schema, fields, path in iterate_schema( process_input, process.input_schema): if 'default' in field_schema and field_schema[ 'name'] not in fields: dict_dot(process_input, path, field_schema['default']) checksum = get_data_checksum(process_input, process.slug, process.version) data_qs = Data.objects.filter( checksum=checksum, process__persistence__in=[ Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP ], ) data_qs = get_objects_for_user(request.user, 'view_data', data_qs) if data_qs.exists(): data = data_qs.order_by('created').last() serializer = self.get_serializer(data) return Response(serializer.data) # create the objects resp = super(DataViewSet, self).create(request, *args, **kwargs) # run manager manager.communicate() return resp
def validate_schema(instance, schema, test_required=True, data_location=None, skip_missing_data=False): """Check if DictField values are consistent with our data types. Perform basic JSON schema validation and our custom validations: * check that required fields are given (if `test_required` is set to ``True``) * check if ``basic:file:`` and ``list:basic:file`` fields match regex given in schema (only if ``validate_regex`` is defined in schema for coresponding fields) and exists (only if ``data_location`` is given) * check if directories referenced in ``basic:dir:`` and ``list:basic:dir``fields exist (only if ``data_location`` is given) * check that referenced ``Data`` objects (in ``data:<data_type>`` and ``list:data:<data_type>`` fields) exists and are of type ``<data_type>`` * check that referenced ``Storage`` objects (in ``basic:json`` fields) exists :param list instance: Instance to be validated :param list schema: Schema for validation :param bool test_required: Flag for testing if all required fields are present. It is usefule if validation is run before ``Data`` object is finished and there are some field stil missing (default: ``False``) :param :class:`~resolwe.flow.models.data.DataLocation` data_location: data location used for checking if files and directories exist (default: ``None``) :param bool skip_missing_data: Don't raise an error if referenced ``Data`` object does not exist :rtype: None :raises ValidationError: if ``instance`` doesn't match schema defined in ``schema`` """ from .storage import Storage # Prevent circular import. path_prefix = None if data_location: path_prefix = data_location.get_path() def validate_refs(field): """Validate reference paths.""" for ref_filename in field.get('refs', []): ref_path = os.path.join(path_prefix, ref_filename) if not os.path.exists(ref_path): raise ValidationError("Path referenced in `refs` ({}) does not exist.".format(ref_path)) if not (os.path.isfile(ref_path) or os.path.isdir(ref_path)): raise ValidationError( "Path referenced in `refs` ({}) is neither a file or directory.".format(ref_path)) def validate_file(field, regex): """Validate file name (and check that it exists).""" filename = field['file'] if regex and not re.search(regex, filename): raise ValidationError( "File name {} does not match regex {}".format(filename, regex)) if path_prefix: path = os.path.join(path_prefix, filename) if not os.path.exists(path): raise ValidationError("Referenced path ({}) does not exist.".format(path)) if not os.path.isfile(path): raise ValidationError("Referenced path ({}) is not a file.".format(path)) validate_refs(field) def validate_dir(field): """Check that dirs and referenced files exists.""" dirname = field['dir'] if path_prefix: path = os.path.join(path_prefix, dirname) if not os.path.exists(path): raise ValidationError("Referenced path ({}) does not exist.".format(path)) if not os.path.isdir(path): raise ValidationError("Referenced path ({}) is not a directory.".format(path)) validate_refs(field) def validate_data(data_pk, type_): """Check that `Data` objects exist and is of right type.""" from .data import Data # prevent circular import data_qs = Data.objects.filter(pk=data_pk).values('process__type') if not data_qs.exists(): if skip_missing_data: return raise ValidationError( "Referenced `Data` object does not exist (id:{})".format(data_pk)) data = data_qs.first() if not data['process__type'].startswith(type_): raise ValidationError( "Data object of type `{}` is required, but type `{}` is given. " "(id:{})".format(type_, data['process__type'], data_pk)) def validate_range(value, interval, name): """Check that given value is inside the specified range.""" if not interval: return if value < interval[0] or value > interval[1]: raise ValidationError( "Value of field '{}' is out of range. It should be between {} and {}.".format( name, interval[0], interval[1] ) ) is_dirty = False dirty_fields = [] for _schema, _fields, _ in iterate_schema(instance, schema): name = _schema['name'] is_required = _schema.get('required', True) if test_required and is_required and name not in _fields: is_dirty = True dirty_fields.append(name) if name in _fields: field = _fields[name] type_ = _schema.get('type', "") # Treat None as if the field is missing. if not is_required and field is None: continue try: jsonschema.validate([{"type": type_, "value": field}], TYPE_SCHEMA) except jsonschema.exceptions.ValidationError as ex: raise ValidationError(ex.message) choices = [choice['value'] for choice in _schema.get('choices', [])] allow_custom_choice = _schema.get('allow_custom_choice', False) if choices and not allow_custom_choice and field not in choices: raise ValidationError( "Value of field '{}' must match one of predefined choices. " "Current value: {}".format(name, field) ) if type_ == 'basic:file:': validate_file(field, _schema.get('validate_regex')) elif type_ == 'list:basic:file:': for obj in field: validate_file(obj, _schema.get('validate_regex')) elif type_ == 'basic:dir:': validate_dir(field) elif type_ == 'list:basic:dir:': for obj in field: validate_dir(obj) elif type_ == 'basic:json:' and not Storage.objects.filter(pk=field).exists(): raise ValidationError( "Referenced `Storage` object does not exist (id:{})".format(field)) elif type_.startswith('data:'): validate_data(field, type_) elif type_.startswith('list:data:'): for data_id in field: validate_data(data_id, type_[5:]) # remove `list:` from type elif type_ == 'basic:integer:' or type_ == 'basic:decimal:': validate_range(field, _schema.get('range'), name) elif type_ == 'list:basic:integer:' or type_ == 'list:basic:decimal:': for obj in field: validate_range(obj, _schema.get('range'), name) try: # Check that schema definitions exist for all fields for _, _ in iterate_fields(instance, schema): pass except KeyError as ex: raise ValidationError(str(ex)) if is_dirty: dirty_fields = ['"{}"'.format(field) for field in dirty_fields] raise DirtyError("Required fields {} not given.".format(', '.join(dirty_fields)))
def register_descriptors(self, descriptor_schemas, user, force=False, verbosity=1): """Read and register descriptors.""" log_descriptors = [] for descriptor_schema in descriptor_schemas: for schema, _, _ in iterate_schema({}, descriptor_schema.get( "schema", {})): if not schema["type"][-1].endswith(":"): schema["type"] += ":" if "schema" not in descriptor_schema: descriptor_schema["schema"] = [] if not self.valid(descriptor_schema, DESCRIPTOR_SCHEMA): continue slug = descriptor_schema["slug"] version = descriptor_schema.get("version", "0.0.0") int_version = convert_version_string_to_int( version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = DescriptorSchema.objects.filter( slug=slug).aggregate(Max("version"))["version__max"] if latest_version is not None and latest_version > int_version: self.stderr.write( "Skip descriptor schema {}: newer version installed". format(slug)) continue previous_descriptor_qs = DescriptorSchema.objects.filter(slug=slug) if previous_descriptor_qs.exists(): previous_descriptor = previous_descriptor_qs.latest() else: previous_descriptor = None descriptor_query = DescriptorSchema.objects.filter(slug=slug, version=version) if descriptor_query.exists(): if not force: if verbosity > 0: self.stdout.write( "Skip descriptor schema {}: same version installed" .format(slug)) continue descriptor_query.update(**descriptor_schema) log_descriptors.append("Updated {}".format(slug)) else: descriptor = DescriptorSchema.objects.create( contributor=user, **descriptor_schema) assign_contributor_permissions(descriptor) if previous_descriptor: copy_permissions(previous_descriptor, descriptor) log_descriptors.append("Inserted {}".format(slug)) if log_descriptors and verbosity > 0: self.stdout.write("Descriptor schemas Updates:") for log in log_descriptors: self.stdout.write(" {}".format(log))
def fill_with_defaults(process_input, input_schema): """Fill empty optional fields in input with default values.""" for field_schema, fields, path in iterate_schema(process_input, input_schema): if 'default' in field_schema and field_schema['name'] not in fields: dict_dot(process_input, path, field_schema['default'])
def register_processes(self, process_schemas, user, force=False, verbosity=1): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: # TODO: Remove this when all processes are migrated to the # new syntax. if 'flow_collection' in p: if 'entity' in p: self.stderr.write( "Skip processor {}: only one of 'flow_collection' and 'entity' fields " "allowed".format(p['slug']) ) continue p['entity'] = {'type': p.pop('flow_collection')} if p['type'][-1] != ':': p['type'] += ':' if 'category' in p and not p['category'].endswith(':'): p['category'] += ':' for field in ['input', 'output']: for schema, _, _ in iterate_schema({}, p[field] if field in p else {}): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if 'entity' in p: if 'type' not in p['entity']: self.stderr.write( "Skip process {}: 'entity.type' required if 'entity' defined".format(p['slug']) ) continue p['entity_type'] = p['entity']['type'] p['entity_descriptor_schema'] = p['entity'].get('descriptor_schema', p['entity_type']) p['entity_input'] = p['entity'].get('input', None) p.pop('entity') if not DescriptorSchema.objects.filter(slug=p['entity_descriptor_schema']).exists(): self.stderr.write( "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' " "field.".format(p['slug'], p['entity_descriptor_schema']) ) continue if 'persistence' in p: persistence_mapping = { 'RAW': Process.PERSISTENCE_RAW, 'CACHED': Process.PERSISTENCE_CACHED, 'TEMP': Process.PERSISTENCE_TEMP, } p['persistence'] = persistence_mapping[p['persistence']] if 'scheduling_class' in p: scheduling_class_mapping = { 'interactive': Process.SCHEDULING_CLASS_INTERACTIVE, 'batch': Process.SCHEDULING_CLASS_BATCH } p['scheduling_class'] = scheduling_class_mapping[p['scheduling_class']] if 'input' in p: p['input_schema'] = p.pop('input') if 'output' in p: p['output_schema'] = p.pop('output') slug = p['slug'] if 'run' in p: # Set default language to 'bash' if not set. p['run'].setdefault('language', 'bash') # Transform output schema using the execution engine. try: execution_engine = manager.get_execution_engine(p['run']['language']) extra_output_schema = execution_engine.get_output_schema(p) if extra_output_schema: p.setdefault('output_schema', []).extend(extra_output_schema) except InvalidEngineError: self.stderr.write("Skip processor {}: execution engine '{}' not supported".format( slug, p['run']['language'] )) continue # Validate if container image is allowed based on the configured pattern. # NOTE: This validation happens here and is not deferred to executors because the idea # is that this will be moved to a "container" requirement independent of the # executor. if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'): try: container_image = dict_dot(p, 'requirements.executor.docker.image') if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image): self.stderr.write("Skip processor {}: container image does not match '{}'".format( slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE, )) continue except KeyError: pass version = p['version'] int_version = convert_version_string_to_int(version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate(Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write("Skip processor {}: newer version installed".format(slug)) continue previous_process_qs = Process.objects.filter(slug=slug) if previous_process_qs.exists(): previous_process = previous_process_qs.latest() else: previous_process = None process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: if verbosity > 0: self.stdout.write("Skip processor {}: same version installed".format(slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: process = Process.objects.create(contributor=user, **p) assign_contributor_permissions(process) if previous_process: copy_permissions(previous_process, process) log_processors.append("Inserted {}".format(slug)) if verbosity > 0: if log_processors: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if log_templates: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))
def register_processes(self, process_schemas, user, force=False, verbosity=1): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: # TODO: Remove this when all processes are migrated to the # new syntax. if "flow_collection" in p: if "entity" in p: self.stderr.write( "Skip processor {}: only one of 'flow_collection' and 'entity' fields " "allowed".format(p["slug"])) continue p["entity"] = {"type": p.pop("flow_collection")} if p["type"][-1] != ":": p["type"] += ":" if "category" in p and not p["category"].endswith(":"): p["category"] += ":" for field in ["input", "output"]: for schema, _, _ in iterate_schema( {}, p[field] if field in p else {}): if not schema["type"][-1].endswith(":"): schema["type"] += ":" # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if "entity" in p: if "type" not in p["entity"]: self.stderr.write( "Skip process {}: 'entity.type' required if 'entity' defined" .format(p["slug"])) continue if "input" in p["entity"] and p["entity"].get( "always_create", False): self.stderr.write( "Skip process {}: 'entity.input' will not be considered if 'entity.always_create' " "is set to true.".format(p["slug"])) continue p["entity_type"] = p["entity"]["type"] p["entity_descriptor_schema"] = p["entity"].get( "descriptor_schema", p["entity_type"]) p["entity_input"] = p["entity"].get("input", None) p["entity_always_create"] = p["entity"].get( "always_create", False) p.pop("entity") if not DescriptorSchema.objects.filter( slug=p["entity_descriptor_schema"]).exists(): self.stderr.write( "Skip processor {}: Unknown descriptor schema '{}' used in 'entity' " "field.".format(p["slug"], p["entity_descriptor_schema"])) continue if "persistence" in p: persistence_mapping = { "RAW": Process.PERSISTENCE_RAW, "CACHED": Process.PERSISTENCE_CACHED, "TEMP": Process.PERSISTENCE_TEMP, } p["persistence"] = persistence_mapping[p["persistence"]] if "scheduling_class" in p: scheduling_class_mapping = { "interactive": Process.SCHEDULING_CLASS_INTERACTIVE, "batch": Process.SCHEDULING_CLASS_BATCH, } p["scheduling_class"] = scheduling_class_mapping[ p["scheduling_class"]] if "input" in p: p["input_schema"] = p.pop("input") if "output" in p: p["output_schema"] = p.pop("output") slug = p["slug"] if "run" in p: # Set default language to 'bash' if not set. p["run"].setdefault("language", "bash") # Transform output schema using the execution engine. try: execution_engine = manager.get_execution_engine( p["run"]["language"]) extra_output_schema = execution_engine.get_output_schema(p) if extra_output_schema: p.setdefault("output_schema", []).extend(extra_output_schema) except InvalidEngineError: self.stderr.write( "Skip processor {}: execution engine '{}' not supported" .format(slug, p["run"]["language"])) continue # Validate if container image is allowed based on the configured pattern. # NOTE: This validation happens here and is not deferred to executors because the idea # is that this will be moved to a "container" requirement independent of the # executor. if hasattr(settings, "FLOW_CONTAINER_VALIDATE_IMAGE"): try: container_image = dict_dot( p, "requirements.executor.docker.image") if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image): self.stderr.write( "Skip processor {}: container image does not match '{}'" .format( slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE, )) continue except KeyError: pass version = p["version"] int_version = convert_version_string_to_int( version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate( Max("version"))["version__max"] if latest_version is not None and latest_version > int_version: self.stderr.write( "Skip processor {}: newer version installed".format(slug)) continue previous_process_qs = Process.objects.filter(slug=slug) if previous_process_qs.exists(): previous_process = previous_process_qs.latest() else: previous_process = None process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: if verbosity > 0: self.stdout.write( "Skip processor {}: same version installed".format( slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: process = Process.objects.create(contributor=user, **p) assign_contributor_permissions(process) if previous_process: copy_permissions(previous_process, process) log_processors.append("Inserted {}".format(slug)) if verbosity > 0: if log_processors: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if log_templates: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))
def register_processes(self, process_schemas, user, force=False, verbosity=1): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: if p['type'][-1] != ':': p['type'] += ':' if 'category' in p and not p['category'].endswith(':'): p['category'] += ':' for field in ['input', 'output']: for schema, _, _ in iterate_schema( {}, p[field] if field in p else {}): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if 'persistence' in p: persistence_mapping = { 'RAW': Process.PERSISTENCE_RAW, 'CACHED': Process.PERSISTENCE_CACHED, 'TEMP': Process.PERSISTENCE_TEMP, } p['persistence'] = persistence_mapping[p['persistence']] if 'scheduling_class' in p: scheduling_class_mapping = { 'interactive': Process.SCHEDULING_CLASS_INTERACTIVE, 'batch': Process.SCHEDULING_CLASS_BATCH } p['scheduling_class'] = scheduling_class_mapping[ p['scheduling_class']] if 'input' in p: p['input_schema'] = p.pop('input') if 'output' in p: p['output_schema'] = p.pop('output') slug = p['slug'] if 'run' in p: # Set default language to 'bash' if not set. p['run'].setdefault('language', 'bash') # Transform output schema using the execution engine. try: execution_engine = manager.get_execution_engine( p['run']['language']) extra_output_schema = execution_engine.get_output_schema(p) if extra_output_schema: p.setdefault('output_schema', []).extend(extra_output_schema) except InvalidEngineError: self.stderr.write( "Skip processor {}: execution engine '{}' not supported" .format(slug, p['run']['language'])) continue # Validate if container image is allowed based on the configured pattern. # NOTE: This validation happens here and is not deferred to executors because the idea # is that this will be moved to a "container" requirement independent of the # executor. if hasattr(settings, 'FLOW_CONTAINER_VALIDATE_IMAGE'): try: container_image = dict_dot( p, 'requirements.executor.docker.image') if not re.match(settings.FLOW_CONTAINER_VALIDATE_IMAGE, container_image): self.stderr.write( "Skip processor {}: container image does not match '{}'" .format( slug, settings.FLOW_CONTAINER_VALIDATE_IMAGE, )) continue except KeyError: pass version = p['version'] int_version = convert_version_string_to_int( version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate( Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write( "Skip processor {}: newer version installed".format(slug)) continue previous_process_qs = Process.objects.filter(slug=slug) if previous_process_qs.exists(): previous_process = previous_process_qs.latest() else: previous_process = None process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: if verbosity > 0: self.stdout.write( "Skip processor {}: same version installed".format( slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: process = Process.objects.create(contributor=user, **p) assign_contributor_permissions(process) if previous_process: copy_permissions(previous_process, process) log_processors.append("Inserted {}".format(slug)) if verbosity > 0: if log_processors: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if log_templates: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))
def validate_schema( instance, schema, test_required=True, data_location=None, skip_missing_data=False ): """Check if DictField values are consistent with our data types. Perform basic JSON schema validation and our custom validations: * check that required fields are given (if `test_required` is set to ``True``) * check if ``basic:file:`` and ``list:basic:file`` fields match regex given in schema (only if ``validate_regex`` is defined in schema for coresponding fields) and exists (only if ``data_location`` is given) * check if directories referenced in ``basic:dir:`` and ``list:basic:dir``fields exist (only if ``data_location`` is given) * check that referenced ``Data`` objects (in ``data:<data_type>`` and ``list:data:<data_type>`` fields) exists and are of type ``<data_type>`` * check that referenced ``Storage`` objects (in ``basic:json`` fields) exists :param list instance: Instance to be validated :param list schema: Schema for validation :param bool test_required: Flag for testing if all required fields are present. It is usefule if validation is run before ``Data`` object is finished and there are some field stil missing (default: ``False``) :param :class:`~resolwe.flow.models.data.DataLocation` data_location: data location used for checking if files and directories exist (default: ``None``) :param bool skip_missing_data: Don't raise an error if referenced ``Data`` object does not exist :rtype: None :raises ValidationError: if ``instance`` doesn't match schema defined in ``schema`` """ from .storage import Storage # Prevent circular import. path_prefix = None if data_location: path_prefix = data_location.get_path() def validate_refs(field): """Validate reference paths.""" for ref_filename in field.get("refs", []): ref_path = os.path.join(path_prefix, ref_filename) if not os.path.exists(ref_path): raise ValidationError( "Path referenced in `refs` ({}) does not exist.".format(ref_path) ) if not (os.path.isfile(ref_path) or os.path.isdir(ref_path)): raise ValidationError( "Path referenced in `refs` ({}) is neither a file or directory.".format( ref_path ) ) def validate_file(field, regex): """Validate file name (and check that it exists).""" filename = field["file"] if regex and not re.search(regex, filename): raise ValidationError( "File name {} does not match regex {}".format(filename, regex) ) if path_prefix: path = os.path.join(path_prefix, filename) if not os.path.exists(path): raise ValidationError( "Referenced path ({}) does not exist.".format(path) ) if not os.path.isfile(path): raise ValidationError( "Referenced path ({}) is not a file.".format(path) ) validate_refs(field) def validate_dir(field): """Check that dirs and referenced files exists.""" dirname = field["dir"] if path_prefix: path = os.path.join(path_prefix, dirname) if not os.path.exists(path): raise ValidationError( "Referenced path ({}) does not exist.".format(path) ) if not os.path.isdir(path): raise ValidationError( "Referenced path ({}) is not a directory.".format(path) ) validate_refs(field) def validate_data(data_pk, type_): """Check that `Data` objects exist and is of right type.""" from .data import Data # prevent circular import data_qs = Data.objects.filter(pk=data_pk).values("process__type") if not data_qs.exists(): if skip_missing_data: return raise ValidationError( "Referenced `Data` object does not exist (id:{})".format(data_pk) ) data = data_qs.first() if not data["process__type"].startswith(type_): raise ValidationError( "Data object of type `{}` is required, but type `{}` is given. " "(id:{})".format(type_, data["process__type"], data_pk) ) def validate_range(value, interval, name): """Check that given value is inside the specified range.""" if not interval: return if value < interval[0] or value > interval[1]: raise ValidationError( "Value of field '{}' is out of range. It should be between {} and {}.".format( name, interval[0], interval[1] ) ) is_dirty = False dirty_fields = [] for _schema, _fields, _ in iterate_schema(instance, schema): name = _schema["name"] is_required = _schema.get("required", True) if test_required and is_required and name not in _fields: is_dirty = True dirty_fields.append(name) if name in _fields: field = _fields[name] type_ = _schema.get("type", "") # Treat None as if the field is missing. if not is_required and field is None: continue try: jsonschema.validate([{"type": type_, "value": field}], TYPE_SCHEMA) except jsonschema.exceptions.ValidationError as ex: raise ValidationError(ex.message) choices = [choice["value"] for choice in _schema.get("choices", [])] allow_custom_choice = _schema.get("allow_custom_choice", False) if choices and not allow_custom_choice and field not in choices: raise ValidationError( "Value of field '{}' must match one of predefined choices. " "Current value: {}".format(name, field) ) if type_ == "basic:file:": validate_file(field, _schema.get("validate_regex")) elif type_ == "list:basic:file:": for obj in field: validate_file(obj, _schema.get("validate_regex")) elif type_ == "basic:dir:": validate_dir(field) elif type_ == "list:basic:dir:": for obj in field: validate_dir(obj) elif ( type_ == "basic:json:" and not Storage.objects.filter(pk=field).exists() ): raise ValidationError( "Referenced `Storage` object does not exist (id:{})".format(field) ) elif type_.startswith("data:"): validate_data(field, type_) elif type_.startswith("list:data:"): for data_id in field: validate_data(data_id, type_[5:]) # remove `list:` from type elif type_ == "basic:integer:" or type_ == "basic:decimal:": validate_range(field, _schema.get("range"), name) elif type_ == "list:basic:integer:" or type_ == "list:basic:decimal:": for obj in field: validate_range(obj, _schema.get("range"), name) try: # Check that schema definitions exist for all fields for _, _ in iterate_fields(instance, schema): pass except KeyError as ex: raise ValidationError(str(ex)) if is_dirty: dirty_fields = ['"{}"'.format(field) for field in dirty_fields] raise DirtyError( "Required fields {} not given.".format(", ".join(dirty_fields)) )
def register_processes(self, process_schemas, user, force=False, verbosity=1): """Read and register processors.""" log_processors = [] log_templates = [] for p in process_schemas: if p['type'][-1] != ':': p['type'] += ':' if 'category' in p and not p['category'].endswith(':'): p['category'] += ':' # get `data_name` from `static` if 'static' in p: for schema, _, _ in iterate_schema({}, p['static']): if schema['name'] == 'name' and 'default' in schema: p['data_name'] = schema['default'] # support backward compatibility # TODO: update .yml files and remove if 'slug' not in p: p['slug'] = slugify(p.pop('name').replace(':', '-')) p['name'] = p.pop('label') p.pop('var', None) p.pop('static', None) for field in ['input', 'output', 'var', 'static']: for schema, _, _ in iterate_schema( {}, p[field] if field in p else {}): if not schema['type'][-1].endswith(':'): schema['type'] += ':' # TODO: Check if schemas validate with our JSON meta schema and Processor model docs. if not self.valid(p, PROCESSOR_SCHEMA): continue if 'persistence' in p: persistence_mapping = { 'RAW': Process.PERSISTENCE_RAW, 'CACHED': Process.PERSISTENCE_CACHED, 'TEMP': Process.PERSISTENCE_TEMP, } p['persistence'] = persistence_mapping[p['persistence']] if 'input' in p: p['input_schema'] = p.pop('input') if 'output' in p: p['output_schema'] = p.pop('output') slug = p['slug'] if 'run' in p: # Set default language to 'bash' if not set. p['run'].setdefault('language', 'bash') # Transform output schema using the execution engine. try: execution_engine = manager.get_execution_engine( p['run']['language']) extra_output_schema = execution_engine.get_output_schema(p) if extra_output_schema: p.setdefault('output_schema', []).extend(extra_output_schema) except InvalidEngineError: self.stderr.write( "Skip processor {}: execution engine '{}' not supported" .format(slug, p['run']['language'])) continue version = p['version'] int_version = convert_version_string_to_int( version, VERSION_NUMBER_BITS) # `latest version` is returned as `int` so it has to be compared to `int_version` latest_version = Process.objects.filter(slug=slug).aggregate( Max('version'))['version__max'] if latest_version is not None and latest_version > int_version: self.stderr.write( "Skip processor {}: newer version installed".format(slug)) continue previous_process_qs = Process.objects.filter(slug=slug) if previous_process_qs.exists(): previous_process = previous_process_qs.latest() else: previous_process = None process_query = Process.objects.filter(slug=slug, version=version) if process_query.exists(): if not force: if verbosity > 0: self.stdout.write( "Skip processor {}: same version installed".format( slug)) continue process_query.update(**p) log_processors.append("Updated {}".format(slug)) else: process = Process.objects.create(contributor=user, **p) if previous_process: copy_permissions(previous_process, process) log_processors.append("Inserted {}".format(slug)) if verbosity > 0: if len(log_processors) > 0: self.stdout.write("Processor Updates:") for log in log_processors: self.stdout.write(" {}".format(log)) if len(log_templates) > 0: self.stdout.write("Default Template Updates:") for log in log_templates: self.stdout.write(" {}".format(log))
def save(self, render_name=False, *args, **kwargs): """Save the data model.""" # Generate the descriptor if one is not already set. if self.name != self._original_name: self.named_by_user = True create = self.pk is None if create: # Default values for INPUT input_schema = self.process.input_schema # pylint: disable=no-member for field_schema, fields, path in iterate_schema( self.input, input_schema): if 'default' in field_schema and field_schema[ 'name'] not in fields: dict_dot(self.input, path, field_schema['default']) if not self.name: self._render_name() else: self.named_by_user = True self.checksum = get_data_checksum(self.input, self.process.slug, self.process.version) # pylint: disable=no-member elif render_name: self._render_name() self.save_storage(self.output, self.process.output_schema) # pylint: disable=no-member if self.status != Data.STATUS_ERROR: hydrate_size(self) if create: validate_schema(self.input, self.process.input_schema) # pylint: disable=no-member render_descriptor(self) if self.descriptor_schema: try: validate_schema(self.descriptor, self.descriptor_schema.schema) # pylint: disable=no-member self.descriptor_dirty = False except DirtyError: self.descriptor_dirty = True elif self.descriptor and self.descriptor != {}: raise ValueError( "`descriptor_schema` must be defined if `descriptor` is given") if self.status != Data.STATUS_ERROR: path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'], str(self.pk)) output_schema = self.process.output_schema # pylint: disable=no-member if self.status == Data.STATUS_DONE: validate_schema(self.output, output_schema, path_prefix=path_prefix) else: validate_schema(self.output, output_schema, path_prefix=path_prefix, test_required=False) with transaction.atomic(): super(Data, self).save(*args, **kwargs) # We can only save dependencies after the data object has been saved. This # is why a transaction block is needed and the save method must be called first. if create: self.save_dependencies(self.input, self.process.input_schema) # pylint: disable=no-member if create: self.create_entity()