def before_update(self, context, current_resource, updated_resource):

        updated_resource = self._process_schema_fields(updated_resource)

        if not get_update_mode_from_config() == u'async':
            return updated_resource

        needs_validation = False
        if ((
                # New file uploaded
                updated_resource.get(u'upload') or
                # External URL changed
                updated_resource.get(u'url') != current_resource.get(u'url') or
                # Schema changed
            (updated_resource.get(u'schema') != current_resource.get(u'schema')
             ) or
                # Format changed
            (updated_resource.get(u'format', u'').lower() !=
             current_resource.get(u'format', u'').lower())) and (
                 # Make sure format is supported
                 updated_resource.get(u'format', u'').lower()
                 in settings.SUPPORTED_FORMATS)):
            needs_validation = True

        if needs_validation:
            self.resources_to_validate[updated_resource[u'id']] = True

        return updated_resource
Esempio n. 2
0
    def after_update(self, context, data_dict):

        if not get_update_mode_from_config() == u'async':
            return

        if context.get('_validation_performed'):
            # Ugly, but needed to avoid circular loops caused by the
            # validation job calling resource_patch (which calls
            # package_update)
            del context['_validation_performed']
            return

        if data_dict.get(u'resources'):
            # This is a dataset
            for resource in data_dict[u'resources']:
                if resource[u'id'] in self.resources_to_validate:
                    # This is part of a resource_update call, it will be
                    # handled on the next `after_update` call
                    continue
                else:
                    # This is an actual package_update call, validate the
                    # resources if necessary
                    self._handle_validation_for_resource(resource)

        else:
            # This is a resource
            resource_id = data_dict[u'id']

            if resource_id in self.resources_to_validate:
                del self.resources_to_validate[resource_id]

                _run_async_validation(resource_id)
Esempio n. 3
0
    def after_update(self, context, updated_resource):

        if not get_update_mode_from_config() == u'async':
            return

        resource_id = updated_resource[u'id']

        if resource_id in self.resources_to_validate:
            del self.resources_to_validate[resource_id]

            _run_async_validation(resource_id)
Esempio n. 4
0
    def get_actions(self):
        new_actions = {
            u'resource_validation_run': resource_validation_run,
            u'resource_validation_show': resource_validation_show,
            u'resource_validation_delete': resource_validation_delete,
        }

        if get_create_mode_from_config() == u'sync':
            new_actions[u'resource_create'] = custom_resource_create
        if get_update_mode_from_config() == u'sync':
            new_actions[u'resource_update'] = custom_resource_update

        return new_actions
Esempio n. 5
0
    def after_update(self, context, data_dict):

        is_dataset = self._data_dict_is_dataset(data_dict)

        # Need to allow create as well because resource_create calls
        # package_update
        if (not get_update_mode_from_config() == u'async'
                and not get_create_mode_from_config() == u'async'):
            return

        if context.pop('_validation_performed', None):
            # Ugly, but needed to avoid circular loops caused by the
            # validation job calling resource_patch (which calls
            # package_update)
            return

        if is_dataset:
            package_id = data_dict.get('id')
            if self.packages_to_skip.pop(package_id, None) or context.get(
                    'save', False):
                # Either we're updating an individual resource,
                # or we're updating the package metadata via the web form;
                # in both cases, we don't need to validate every resource.
                return

            for resource in data_dict.get(u'resources', []):
                if resource[u'id'] in self.resources_to_validate:
                    # This is part of a resource_update call, it will be
                    # handled on the next `after_update` call
                    continue
                else:
                    # This is an actual package_update call, validate the
                    # resources if necessary
                    self._handle_validation_for_resource(context, resource)

        else:
            # This is a resource
            resource_id = data_dict[u'id']

            if resource_id in self.resources_to_validate:
                for plugin in p.PluginImplementations(IDataValidation):
                    if not plugin.can_validate(context, data_dict):
                        log.debug('Skipping validation for resource %s',
                                  data_dict['id'])
                        return

                del self.resources_to_validate[resource_id]

                _run_async_validation(resource_id)
Esempio n. 6
0
    def before_update(self, context, current_resource, updated_resource):

        updated_resource = self._process_schema_fields(updated_resource)

        # the call originates from a resource API, so don't validate the entire package
        package_id = updated_resource.get('package_id')
        if not package_id:
            existing_resource = t.get_action('resource_show')(
                context={
                    'ignore_auth': True
                },
                data_dict={
                    'id': updated_resource['id']
                })
            if existing_resource:
                package_id = existing_resource['package_id']
        self.packages_to_skip[package_id] = True

        if not get_update_mode_from_config() == u'async':
            return updated_resource

        needs_validation = False
        if (
                # New file uploaded
                updated_resource.get(u'upload')
                # External URL changed
                or updated_resource.get(u'url') != current_resource.get(u'url')
                # Schema changed
                or (updated_resource.get(u'schema') !=
                    current_resource.get(u'schema'))
                # Format changed
                or (updated_resource.get(u'format', u'').lower() !=
                    current_resource.get(u'format', u'').lower())) and (
                        # Make sure format is supported
                        updated_resource.get(u'format', u'').lower()
                        in settings.SUPPORTED_FORMATS):
            needs_validation = True

        if needs_validation:
            self.resources_to_validate[updated_resource[u'id']] = True

        return updated_resource
    def after_update(self, context, data_dict):

        is_dataset = self._data_dict_is_dataset(data_dict)

        # Need to allow create as well because resource_create calls
        # package_update
        if (not get_update_mode_from_config() == u'async'
                and not get_create_mode_from_config() == u'async'):
            return

        if context.get('_validation_performed'):
            # Ugly, but needed to avoid circular loops caused by the
            # validation job calling resource_patch (which calls
            # package_update)
            del context['_validation_performed']
            return

        if is_dataset:
            for resource in data_dict.get(u'resources', []):
                if resource[u'id'] in self.resources_to_validate:
                    # This is part of a resource_update call, it will be
                    # handled on the next `after_update` call
                    continue
                else:
                    # This is an actual package_update call, validate the
                    # resources if necessary
                    self._handle_validation_for_resource(context, resource)

        else:
            # This is a resource
            resource_id = data_dict[u'id']

            if resource_id in self.resources_to_validate:
                for plugin in p.PluginImplementations(IDataValidation):
                    if not plugin.can_validate(context, data_dict):
                        log.debug('Skipping validation for resource {}'.format(
                            data_dict['id']))
                        return

                del self.resources_to_validate[resource_id]

                _run_async_validation(resource_id)
Esempio n. 8
0
    def after_update(self, context, data_dict):
        is_dataset = self._data_dict_is_dataset(data_dict)

        # Need to allow create as well because resource_create calls
        # package_update
        if (not get_update_mode_from_config() == u'async'
                and not get_create_mode_from_config() == u'async'):
            return

        if context.get('_validation_performed'):
            # Ugly, but needed to avoid circular loops caused by the
            # validation job calling resource_patch (which calls
            # package_update)
            del context['_validation_performed']
            return

        if not is_dataset:
            if context.get('_dont_validate'):
                # Ugly, but needed to avoid circular loops caused by the
                # validation job calling resource_patch (which calls
                # package_update)
                del context['_dont_validate']
                return
            # This is a resource
            resource_id = data_dict[u'id']
            if resource_id in self.resources_to_validate:
                for plugin in p.PluginImplementations(IDataValidation):
                    if not plugin.can_validate(context, data_dict):
                        log.debug('Skipping validation for resource {}'.format(
                            data_dict['id']))
                        return

                del self.resources_to_validate[resource_id]
                _run_async_validation(resource_id)

                if data_dict.get('validate_package'):
                    t.get_action('resource_validation_run_batch')(
                        context, {
                            'dataset_ids': data_dict.get('package_id')
                        })
    def test_config_both_false(self):

        assert_equals(get_update_mode_from_config(), None)
        assert_equals(get_create_mode_from_config(), None)
Esempio n. 10
0
    def test_config_update_false_async(self):

        assert_equals(get_update_mode_from_config(), None)
Esempio n. 11
0
    def test_config_update_true_async(self):

        assert_equals(get_update_mode_from_config(), 'async')
Esempio n. 12
0
    def test_config_defaults(self):

        assert_equals(get_update_mode_from_config(), 'async')
        assert_equals(get_create_mode_from_config(), 'async')
def resource_update(context, data_dict):
    '''Update a resource.

    This is duplicate of the CKAN core resource_update action, with just the
    addition of a synchronous data validation step.

    This is of course not ideal but it's the only way right now to hook
    reliably into the creation process without overcomplicating things.
    Hopefully future versions of CKAN will incorporate more flexible hook
    points that will allow a better approach.

    '''
    model = context['model']
    id = t.get_or_bust(data_dict, "id")

    if not data_dict.get('url'):
        data_dict['url'] = ''

    resource = model.Resource.get(id)
    context["resource"] = resource
    old_resource_format = resource.format

    if not resource:
        log.debug('Could not find resource %s', id)
        raise t.ObjectNotFound(t._('Resource was not found.'))

    t.check_access('resource_update', context, data_dict)
    del context["resource"]

    package_id = resource.package.id
    pkg_dict = t.get_action('package_show')(dict(context, return_type='dict'),
                                            {'id': package_id})

    for n, p in enumerate(pkg_dict['resources']):
        if p['id'] == id:
            break
    else:
        log.error('Could not find resource %s after all', id)
        raise t.ObjectNotFound(t._('Resource was not found.'))

    # Persist the datastore_active extra if already present and not provided
    if ('datastore_active' in resource.extras and
            'datastore_active' not in data_dict):
        data_dict['datastore_active'] = resource.extras['datastore_active']

    for plugin in plugins.PluginImplementations(plugins.IResourceController):
        plugin.before_update(context, pkg_dict['resources'][n], data_dict)

    upload = uploader.get_resource_uploader(data_dict)

    if 'mimetype' not in data_dict:
        if hasattr(upload, 'mimetype'):
            data_dict['mimetype'] = upload.mimetype

    if 'size' not in data_dict and 'url_type' in data_dict:
        if hasattr(upload, 'filesize'):
            data_dict['size'] = upload.filesize

    pkg_dict['resources'][n] = data_dict

    try:
        context['defer_commit'] = True
        context['use_cache'] = False
        updated_pkg_dict = t.get_action('package_update')(context, pkg_dict)
        context.pop('defer_commit')
    except t.ValidationError as e:
        try:
            raise t.ValidationError(e.error_dict['resources'][-1])
        except (KeyError, IndexError):
            raise t.ValidationError(e.error_dict)

    upload.upload(id, uploader.get_max_resource_size())

    # Custom code starts

    if get_update_mode_from_config() == u'sync':

        run_validation = True
        for plugin in plugins.PluginImplementations(IDataValidation):
            if not plugin.can_validate(context, data_dict):
                log.debug('Skipping validation for resource %s', id)
                run_validation = False

        if run_validation:
            is_local_upload = (
                hasattr(upload, 'filename') and
                upload.filename is not None and
                isinstance(upload, uploader.ResourceUpload))
            _run_sync_validation(
                id, local_upload=is_local_upload, new_resource=True)

    # Custom code ends

    model.repo.commit()

    resource = t.get_action('resource_show')(context, {'id': id})

    if old_resource_format != resource['format']:
        t.get_action('resource_create_default_resource_views')(
            {'model': context['model'], 'user': context['user'],
             'ignore_auth': True},
            {'package': updated_pkg_dict,
             'resource': resource})

    for plugin in plugins.PluginImplementations(plugins.IResourceController):
        plugin.after_update(context, resource)

    return resource
Esempio n. 14
0
    def test_config_update_true_async(self):

        assert get_update_mode_from_config() == 'async'
Esempio n. 15
0
    def test_config_both_false(self):

        assert get_update_mode_from_config() is None
        assert get_create_mode_from_config() is None
Esempio n. 16
0
    def test_config_update_false_async(self):

        assert get_update_mode_from_config() is None
Esempio n. 17
0
    try:
        context['defer_commit'] = True
        context['use_cache'] = False
        updated_pkg_dict = t.get_action('package_update')(context, pkg_dict)
        context.pop('defer_commit')
    except t.ValidationError, e:
        try:
            raise t.ValidationError(e.error_dict['resources'][-1])
        except (KeyError, IndexError):
            raise t.ValidationError(e.error_dict)

    upload.upload(id, uploader.get_max_resource_size())

    # Custom code starts

    if get_update_mode_from_config() == u'sync':
        is_upload = (hasattr(upload, 'filename')
                     and upload.filename is not None)
        _run_sync_validation(id, upload=is_upload)

    # Custom code ends

    model.repo.commit()

    resource = t.get_action('resource_show')(context, {'id': id})

    if old_resource_format != resource['format']:
        t.get_action('resource_create_default_resource_views')(
            {
                'model': context['model'],
                'user': context['user'],
Esempio n. 18
0
    def test_config_defaults(self):

        assert get_update_mode_from_config() == 'async'
        assert get_create_mode_from_config() == 'async'