Example #1
0
    def test_checksum_consistency(self):
        process = Process(version='1.0.0', slug='my-process')
        data = Data()

        data.input = {'tss': 0, 'genome': 'HG19'}
        checksum = get_data_checksum(data.input, process.slug, process.version)
        self.assertEqual(checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')

        data.input = {'genome': 'HG19', 'tss': 0}
        checksum = get_data_checksum(data.input, process.slug, process.version)
        self.assertEqual(checksum, 'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')
Example #2
0
    def test_checksum_consistency(self):
        process = Process(version='1.0.0', slug='my-process')
        data = Data()

        data.input = {'tss': 0, 'genome': 'HG19'}
        checksum = get_data_checksum(data.input, process.slug, process.version)
        self.assertEqual(
            checksum,
            'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')

        data.input = {'genome': 'HG19', 'tss': 0}
        checksum = get_data_checksum(data.input, process.slug, process.version)
        self.assertEqual(
            checksum,
            'ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c')
Example #3
0
    def test_checksum_consistency(self):
        process = Process(version="1.0.0", slug="my-process")
        data = Data()

        data.input = {"tss": 0, "genome": "HG19"}
        checksum = get_data_checksum(data.input, process.slug, process.version)
        self.assertEqual(
            checksum,
            "ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c")

        data.input = {"genome": "HG19", "tss": 0}
        checksum = get_data_checksum(data.input, process.slug, process.version)
        self.assertEqual(
            checksum,
            "ca322c2bb48b58eea3946e624fe6cfdc53c2cc12478465b6f0ca2d722e280c4c")
Example #4
0
    def save(self, render_name=False, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
        """Save the data model."""
        if self.name != self._original_name:
            self.named_by_user = True

        create = self.pk is None
        if create:
            fill_with_defaults(self.input, self.process.input_schema)  # pylint: disable=no-member

            if not self.name:
                self._render_name()
            else:
                self.named_by_user = True

            self.checksum = get_data_checksum(
                self.input, self.process.slug, self.process.version)  # pylint: disable=no-member

        elif render_name:
            self._render_name()

        self.save_storage(self.output, self.process.output_schema)  # pylint: disable=no-member

        if self.status != Data.STATUS_ERROR:
            hydrate_size(self)
            # If only specified fields are updated (e.g. in executor), size needs to be added
            if 'update_fields' in kwargs:
                kwargs['update_fields'].append('size')

        # Input Data objects are validated only upon creation as they can be deleted later.
        skip_missing_data = not create
        validate_schema(
            self.input, self.process.input_schema, skip_missing_data=skip_missing_data  # pylint: disable=no-member
        )

        render_descriptor(self)

        if self.descriptor_schema:
            try:
                validate_schema(self.descriptor, self.descriptor_schema.schema)  # pylint: disable=no-member
                self.descriptor_dirty = False
            except DirtyError:
                self.descriptor_dirty = True
        elif self.descriptor and self.descriptor != {}:
            raise ValueError("`descriptor_schema` must be defined if `descriptor` is given")

        if self.status != Data.STATUS_ERROR:
            output_schema = self.process.output_schema  # pylint: disable=no-member
            if self.status == Data.STATUS_DONE:
                validate_schema(
                    self.output, output_schema, data_location=self.location, skip_missing_data=True
                )
            else:
                validate_schema(
                    self.output, output_schema, data_location=self.location, test_required=False
                )

        with transaction.atomic():
            self._perform_save(*args, **kwargs)
Example #5
0
    def save(self, render_name=False, *args, **kwargs):
        """Save the data model."""
        if self.name != self._original_name:
            self.named_by_user = True

        try:
            jsonschema.validate(
                self.process_resources, validation_schema("process_resources")
            )
        except jsonschema.exceptions.ValidationError as exception:
            # Re-raise as Django ValidationError
            raise ValidationError(exception.message)

        create = self.pk is None
        if create:
            fill_with_defaults(self.input, self.process.input_schema)

            if not self.name:
                self._render_name()
            else:
                self.named_by_user = True

            self.checksum = get_data_checksum(
                self.input, self.process.slug, self.process.version
            )

            validate_schema(self.input, self.process.input_schema)

            hydrate_size(self)
            # If only specified fields are updated (e.g. in executor), size needs to be added
            if "update_fields" in kwargs:
                kwargs["update_fields"].append("size")

        elif render_name:
            self._render_name()

        render_descriptor(self)

        if self.descriptor_schema:
            try:
                validate_schema(self.descriptor, self.descriptor_schema.schema)
                self.descriptor_dirty = False
            except DirtyError:
                self.descriptor_dirty = True
        elif self.descriptor and self.descriptor != {}:
            raise ValueError(
                "`descriptor_schema` must be defined if `descriptor` is given"
            )

        with transaction.atomic():
            self._perform_save(*args, **kwargs)

        self._original_output = self.output
Example #6
0
    def perform_get_or_create(self, request, *args, **kwargs):
        """Perform "get_or_create" - return existing object if found."""
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        process = serializer.validated_data.get('process')
        process_input = request.data.get('input', {})

        fill_with_defaults(process_input, process.input_schema)

        checksum = get_data_checksum(process_input, process.slug, process.version)
        data_qs = Data.objects.filter(
            checksum=checksum,
            process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP],
        )
        data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
        if data_qs.exists():
            data = data_qs.order_by('created').last()
            serializer = self.get_serializer(data)
            return Response(serializer.data)
Example #7
0
    def perform_get_or_create(self, request, *args, **kwargs):
        """Perform "get_or_create" - return existing object if found."""
        serializer = self.get_serializer(data=request.data)
        serializer.is_valid(raise_exception=True)
        process = serializer.validated_data.get('process')
        process_input = request.data.get('input', {})

        fill_with_defaults(process_input, process.input_schema)

        checksum = get_data_checksum(process_input, process.slug, process.version)
        data_qs = Data.objects.filter(
            checksum=checksum,
            process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP],
        )
        data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
        if data_qs.exists():
            data = data_qs.order_by('created').last()
            serializer = self.get_serializer(data)
            return Response(serializer.data)
Example #8
0
    def save(self, render_name=False, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
        """Save the data model."""
        # Generate the descriptor if one is not already set.
        if self.name != self._original_name:
            self.named_by_user = True

        create = self.pk is None
        if create:
            fill_with_defaults(self.input, self.process.input_schema)  # pylint: disable=no-member

            if not self.name:
                self._render_name()
            else:
                self.named_by_user = True

            self.checksum = get_data_checksum(self.input, self.process.slug,
                                              self.process.version)  # pylint: disable=no-member

        elif render_name:
            self._render_name()

        self.save_storage(self.output, self.process.output_schema)  # pylint: disable=no-member

        if self.status != Data.STATUS_ERROR:
            hydrate_size(self)
            # If only specified fields are updated (e.g. in executor), size needs to be added
            if 'update_fields' in kwargs:
                kwargs['update_fields'].append('size')

        # Input Data objects are validated only upon creation as they can be deleted later.
        skip_missing_data = not create
        validate_schema(
            self.input,
            self.process.input_schema,
            skip_missing_data=skip_missing_data  # pylint: disable=no-member
        )

        render_descriptor(self)

        if self.descriptor_schema:
            try:
                validate_schema(self.descriptor, self.descriptor_schema.schema)  # pylint: disable=no-member
                self.descriptor_dirty = False
            except DirtyError:
                self.descriptor_dirty = True
        elif self.descriptor and self.descriptor != {}:
            raise ValueError(
                "`descriptor_schema` must be defined if `descriptor` is given")

        if self.status != Data.STATUS_ERROR:
            path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'],
                                       str(self.pk))
            output_schema = self.process.output_schema  # pylint: disable=no-member
            if self.status == Data.STATUS_DONE:
                validate_schema(self.output,
                                output_schema,
                                path_prefix=path_prefix)
            else:
                validate_schema(self.output,
                                output_schema,
                                path_prefix=path_prefix,
                                test_required=False)

        with transaction.atomic():
            super().save(*args, **kwargs)

            # We can only save dependencies after the data object has been saved. This
            # is why a transaction block is needed and the save method must be called first.
            if create:
                self.save_dependencies(self.input, self.process.input_schema)  # pylint: disable=no-member
                self.create_entity()
Example #9
0
    def create(self, request, *args, **kwargs):
        """Create a resource."""
        collections = request.data.get('collections', [])

        # check that user has permissions on all collections that Data
        # object will be added to
        for collection_id in collections:
            try:
                collection = Collection.objects.get(pk=collection_id)
            except Collection.DoesNotExist:
                return Response(
                    {
                        'collections': [
                            'Invalid pk "{}" - object does not exist.'.format(
                                collection_id)
                        ]
                    },
                    status=status.HTTP_400_BAD_REQUEST)

            if not request.user.has_perm('add_collection', obj=collection):
                if request.user.has_perm('view_collection', obj=collection):
                    raise exceptions.PermissionDenied(
                        "You don't have `ADD` permission on collection (id: {})."
                        .format(collection_id))
                else:
                    raise exceptions.NotFound(
                        "Collection not found (id: {}).".format(collection_id))

        # translate processe's slug to id
        process_slug = request.data.get('process', None)
        process_query = Process.objects.filter(slug=process_slug)
        process_query = get_objects_for_user(request.user, 'view_process',
                                             process_query)
        try:
            process = process_query.latest()
        except Process.DoesNotExist:
            return Response(
                {
                    'process': [
                        'Invalid process slug "{}" - object does not exist.'.
                        format(process_slug)
                    ]
                },
                status=status.HTTP_400_BAD_REQUEST)
        request.data['process'] = process.pk

        # perform "get_or_create" if requested - return existing object
        # if found
        if kwargs.pop('get_or_create', False):
            process_input = request.data.get('input', {})

            # use default values if they are not given
            for field_schema, fields, path in iterate_schema(
                    process_input, process.input_schema):
                if 'default' in field_schema and field_schema[
                        'name'] not in fields:
                    dict_dot(process_input, path, field_schema['default'])

            checksum = get_data_checksum(process_input, process.slug,
                                         process.version)
            data_qs = Data.objects.filter(
                checksum=checksum,
                process__persistence__in=[
                    Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP
                ],
            )
            data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
            if data_qs.exists():
                data = data_qs.order_by('created').last()
                serializer = self.get_serializer(data)
                return Response(serializer.data)

        # create the objects
        resp = super(DataViewSet, self).create(request, *args, **kwargs)

        # run manager
        manager.communicate()

        return resp
Example #10
0
    def save(self, render_name=False, *args, **kwargs):
        """Save the data model."""
        # Generate the descriptor if one is not already set.
        if self.name != self._original_name:
            self.named_by_user = True

        create = self.pk is None
        if create:
            # Default values for INPUT
            input_schema = self.process.input_schema  # pylint: disable=no-member
            for field_schema, fields, path in iterate_schema(
                    self.input, input_schema):
                if 'default' in field_schema and field_schema[
                        'name'] not in fields:
                    dict_dot(self.input, path, field_schema['default'])

            if not self.name:
                self._render_name()
            else:
                self.named_by_user = True

            self.checksum = get_data_checksum(self.input, self.process.slug,
                                              self.process.version)  # pylint: disable=no-member

        elif render_name:
            self._render_name()

        self.save_storage(self.output, self.process.output_schema)  # pylint: disable=no-member

        if self.status != Data.STATUS_ERROR:
            hydrate_size(self)

        if create:
            validate_schema(self.input, self.process.input_schema)  # pylint: disable=no-member

        render_descriptor(self)

        if self.descriptor_schema:
            try:
                validate_schema(self.descriptor, self.descriptor_schema.schema)  # pylint: disable=no-member
                self.descriptor_dirty = False
            except DirtyError:
                self.descriptor_dirty = True
        elif self.descriptor and self.descriptor != {}:
            raise ValueError(
                "`descriptor_schema` must be defined if `descriptor` is given")

        if self.status != Data.STATUS_ERROR:
            path_prefix = os.path.join(settings.FLOW_EXECUTOR['DATA_DIR'],
                                       str(self.pk))
            output_schema = self.process.output_schema  # pylint: disable=no-member
            if self.status == Data.STATUS_DONE:
                validate_schema(self.output,
                                output_schema,
                                path_prefix=path_prefix)
            else:
                validate_schema(self.output,
                                output_schema,
                                path_prefix=path_prefix,
                                test_required=False)

        with transaction.atomic():
            super(Data, self).save(*args, **kwargs)

            # We can only save dependencies after the data object has been saved. This
            # is why a transaction block is needed and the save method must be called first.
            if create:
                self.save_dependencies(self.input, self.process.input_schema)  # pylint: disable=no-member

        if create:
            self.create_entity()
Example #11
0
def calculate_checksum(apps, schema_editor):
    Data = apps.get_model("flow", "Data")
    for data in Data.objects.all():
        data.checksum = get_data_checksum(data.input, data.process.slug,
                                          data.process.version)
        data.save()
Example #12
0
    def create(self, request, *args, **kwargs):
        """Create a resource."""
        collections = request.data.get('collections', [])

        # check that user has permissions on all collections that Data
        # object will be added to
        for collection_id in collections:
            try:
                collection = Collection.objects.get(pk=collection_id)
            except Collection.DoesNotExist:
                return Response({'collections': ['Invalid pk "{}" - object does not exist.'.format(collection_id)]},
                                status=status.HTTP_400_BAD_REQUEST)

            if not request.user.has_perm('add_collection', obj=collection):
                if request.user.is_authenticated():
                    raise exceptions.PermissionDenied
                else:
                    raise exceptions.NotFound

        # translate processe's slug to id
        process_slug = request.data.get('process', None)
        process_query = Process.objects.filter(slug=process_slug).order_by('version')
        if not process_query.exists():
            # XXX: security - is it ok to reveal which processes (don't) exist?
            return Response({'process': ['Invalid process slug "{}" - object does not exist.'.format(process_slug)]},
                            status=status.HTTP_400_BAD_REQUEST)
        process = process_query.last()
        request.data['process'] = process.pk

        # check that user has permission on the process
        if not request.user.has_perm('view_process', obj=process):
            if request.user.is_authenticated():
                raise exceptions.PermissionDenied
            else:
                raise exceptions.NotFound

        # perform "get_or_create" if requested - return existing object
        # if found
        if kwargs.pop('get_or_create', False):
            process_input = request.data.get('input', {})

            # use default values if they are not given
            for field_schema, fields, path in iterate_schema(process_input, process.input_schema):
                if 'default' in field_schema and field_schema['name'] not in fields:
                    dict_dot(process_input, path, field_schema['default'])

            checksum = get_data_checksum(process_input, process.slug, process.version)
            data_qs = Data.objects.filter(
                checksum=checksum,
                process__persistence__in=[Process.PERSISTENCE_CACHED, Process.PERSISTENCE_TEMP],
            )
            data_qs = get_objects_for_user(request.user, 'view_data', data_qs)
            if data_qs.exists():
                data = data_qs.order_by('created').last()
                serializer = self.get_serializer(data)
                return Response(serializer.data)

        # create the objects
        resp = super(ResolweCreateDataModelMixin, self).create(request, *args, **kwargs)

        # run manager
        manager.communicate()

        return resp
Example #13
0
    def save(self, render_name=False, *args, **kwargs):  # pylint: disable=keyword-arg-before-vararg
        """Save the data model."""
        if self.name != self._original_name:
            self.named_by_user = True

        create = self.pk is None
        if create:
            fill_with_defaults(self.input, self.process.input_schema)  # pylint: disable=no-member

            if not self.name:
                self._render_name()
            else:
                self.named_by_user = True

            self.checksum = get_data_checksum(
                self.input, self.process.slug, self.process.version)  # pylint: disable=no-member

        elif render_name:
            self._render_name()

        self.save_storage(self.output, self.process.output_schema)  # pylint: disable=no-member

        if self.status != Data.STATUS_ERROR:
            hydrate_size(self)
            # If only specified fields are updated (e.g. in executor), size needs to be added
            if 'update_fields' in kwargs:
                kwargs['update_fields'].append('size')

        # Input Data objects are validated only upon creation as they can be deleted later.
        skip_missing_data = not create
        validate_schema(
            self.input, self.process.input_schema, skip_missing_data=skip_missing_data  # pylint: disable=no-member
        )

        render_descriptor(self)

        if self.descriptor_schema:
            try:
                validate_schema(self.descriptor, self.descriptor_schema.schema)  # pylint: disable=no-member
                self.descriptor_dirty = False
            except DirtyError:
                self.descriptor_dirty = True
        elif self.descriptor and self.descriptor != {}:
            raise ValueError("`descriptor_schema` must be defined if `descriptor` is given")

        if self.status != Data.STATUS_ERROR:
            output_schema = self.process.output_schema  # pylint: disable=no-member
            if self.status == Data.STATUS_DONE:
                validate_schema(
                    self.output, output_schema, data_location=self.location, skip_missing_data=True
                )
            else:
                validate_schema(
                    self.output, output_schema, data_location=self.location, test_required=False
                )

        with transaction.atomic():
            self._perform_save(*args, **kwargs)

            # We can only save dependencies after the data object has been saved. This
            # is why a transaction block is needed and the save method must be called first.
            if create:
                self.save_dependencies(self.input, self.process.input_schema)  # pylint: disable=no-member
                self.create_entity()
def calculate_checksum(apps, schema_editor):
    Data = apps.get_model("flow", "Data")
    for data in Data.objects.all():
        data.checksum = get_data_checksum(data.input, data.process.slug, data.process.version)
        data.save()