def resource_create(context, data_dict): '''Appends a new resource to a datasets list of resources. This is duplicate of the CKAN core resource_create action, with just the addition of a synchronous data validation step. This is of course not ideal but it's the only way right now to hook reliably into the creation process without overcomplicating things. Hopefully future versions of CKAN will incorporate more flexible hook points that will allow a better approach. ''' model = context['model'] package_id = t.get_or_bust(data_dict, 'package_id') if not data_dict.get('url'): data_dict['url'] = '' pkg_dict = t.get_action('package_show')(dict(context, return_type='dict'), { 'id': package_id }) t.check_access('resource_create', context, data_dict) for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_create(context, data_dict) if 'resources' not in pkg_dict: pkg_dict['resources'] = [] upload = uploader.get_resource_uploader(data_dict) if 'mimetype' not in data_dict: if hasattr(upload, 'mimetype'): data_dict['mimetype'] = upload.mimetype if 'size' not in data_dict: if hasattr(upload, 'filesize'): data_dict['size'] = upload.filesize pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False t.get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except t.ValidationError, e: try: raise t.ValidationError(e.error_dict['resources'][-1]) except (KeyError, IndexError): raise t.ValidationError(e.error_dict)
def resource_validation_run(context, data_dict): u''' Start a validation job against a resource. Returns the identifier for the job started. Note that the resource format must be one of the supported ones, currently CSV or Excel. :param resource_id: id of the resource to validate :type resource_id: string :rtype: string ''' t.check_access(u'resource_validation_run', context, data_dict) resource_id = data_dict.get(u'resource_id') if not resource_id: raise t.ValidationError({u'resource_id': u'Missing value'}) resource = t.get_action(u'resource_show')( {}, {u'id': resource_id}) # TODO: limit to sysadmins async_job = data_dict.get(u'async', True) # Ensure format is supported if not resource.get(u'format', u'').lower() in settings.SUPPORTED_FORMATS: raise t.ValidationError( {u'format': u'Unsupported resource format.' u'Must be one of {}'.format( u','.join(settings.SUPPORTED_FORMATS))}) # Ensure there is a URL or file upload if not resource.get(u'url') and not resource.get(u'url_type') == u'upload': raise t.ValidationError( {u'url': u'Resource must have a valid URL or an uploaded file'}) # Check if there was an existing validation for the resource try: session = context['model'].Session ValidationStatusHelper().createValidationJob(session, resource_id) except ValidationJobAlreadyEnqueued: if async_job: log.error("resource_validation_run: ValidationJobAlreadyEnqueued %s", data_dict['resource_id']) return if async_job: package_id = resource['package_id'] enqueue_validation_job(package_id, resource_id) else: run_validation_job(resource)
def resource_validation_delete(context, data_dict): u''' Remove the validation job result for a particular resource. It also deletes the underlying Validation object. :param resource_id: id of the resource to remove validation from :type resource_id: string :rtype: None ''' t.check_access(u'resource_validation_delete', context, data_dict) if not data_dict.get(u'resource_id'): raise t.ValidationError({u'resource_id': u'Missing value'}) Session = context['model'].Session try: validation = Session.query(Validation).filter( Validation.resource_id == data_dict['resource_id']).one() except NoResultFound: validation = None if not validation: raise t.ObjectNotFound('No validation report exists for this resource') Session.delete(validation) Session.commit()
def _process_schema_fields(self, data_dict): u''' Normalize the different ways of providing the `schema` field 1. If `schema_upload` is provided and it's a valid file, the contents are read into `schema`. 2. If `schema_url` is provided and looks like a valid URL, it's copied to `schema` 3. If `schema_json` is provided, it's copied to `schema`. All the 3 `schema_*` fields are removed from the data_dict. Note that the data_dict still needs to pass validation ''' schema_upload = data_dict.pop(u'schema_upload', None) schema_url = data_dict.pop(u'schema_url', None) schema_json = data_dict.pop(u'schema_json', None) if isinstance(schema_upload, cgi.FieldStorage): data_dict[u'schema'] = schema_upload.file.read() elif schema_url: if (not isinstance(schema_url, basestring) or not schema_url.lower()[:4] == u'http'): raise t.ValidationError({u'schema_url': 'Must be a valid URL'}) data_dict[u'schema'] = schema_url elif schema_json: data_dict[u'schema'] = schema_json return data_dict
def resource_validation_delete(context, data_dict): u''' Remove the validation job result for a particular resource. It also deletes the underlying Validation object. :param resource_id: id of the resource to remove validation from :type resource_id: string :rtype: None ''' t.check_access(u'resource_validation_delete', context, data_dict) if not data_dict.get(u'resource_id'): raise t.ValidationError({u'resource_id': u'Missing value'}) session = context['model'].Session validation = ValidationStatusHelper().getValidationJob(session, data_dict['resource_id']) if not validation: raise t.ObjectNotFound( 'No validation report exists for this resource') ValidationStatusHelper().deleteValidationJob(session, validation)
def resource_validation_show(context, data_dict): u''' Display the validation job result for a particular resource. Returns a validation object, including the validation report or errors and metadata about the validation like the timestamp and current status. Validation status can be one of: * `created`: The validation job is in the processing queue * `running`: Validation is under way * `error`: There was an error while performing the validation, eg the file could not be downloaded or there was an error reading it * `success`: Validation was performed, and no issues were found * `failure`: Validation was performed, and there were issues found :param resource_id: id of the resource to validate :type resource_id: string :rtype: dict ''' t.check_access(u'resource_validation_show', context, data_dict) if not data_dict.get(u'resource_id'): raise t.ValidationError({u'resource_id': u'Missing value'}) session = context['model'].Session validation = ValidationStatusHelper().getValidationJob(session, data_dict['resource_id']) if not validation: raise t.ObjectNotFound( 'No validation report exists for this resource') return _validation_dictize(validation)
def _run_sync_validation(resource_id, upload=False, new_resource=True, user=None): try: t.get_action(u'resource_validation_run')({ u'ignore_auth': True }, { u'resource_id': resource_id, u'async': False }) except t.ValidationError as e: log.info(u'Could not run validation for resource {}: {}'.format( resource_id, str(e))) return validation = t.get_action(u'resource_validation_show')( { u'ignore_auth': True }, { u'resource_id': resource_id }) report = validation['report'] if not report['valid']: # Delete validation object t.get_action(u'resource_validation_delete')({ u'ignore_auth': True }, { u'resource_id': resource_id }) # Delete uploaded file if upload: delete_local_uploaded_file(resource_id) if new_resource: # Delete resource t.get_action(u'resource_delete')({ u'ignore_auth': True, 'user': user }, { u'id': resource_id }) raise t.ValidationError({u'validation': [report]})
def check_spatial_extra(self, package): ''' For a given package, looks at the spatial extent (as given in the extra "spatial" in GeoJSON format) and records it in PostGIS. ''' from ckanext.spatial.lib import save_package_extent if not package.id: log.warning( 'Couldn\'t store spatial extent because no id was provided for the package' ) return # TODO: deleted extra for extra in package.extras_list: if extra.key == 'spatial': if extra.state == 'active' and extra.value: try: log.debug('Received: %r' % extra.value) geometry = json.loads(extra.value) except ValueError as e: error_dict = { 'spatial': [ u'Error decoding JSON object: %s' % six.text_type(e) ] } raise tk.ValidationError( error_dict, error_summary=package_error_summary(error_dict)) except TypeError as e: error_dict = { 'spatial': [ u'Error decoding JSON object: %s' % six.text_type(e) ] } raise tk.ValidationError( error_dict, error_summary=package_error_summary(error_dict)) try: save_package_extent(package.id, geometry) except ValueError as e: error_dict = { 'spatial': [ u'Error creating geometry: %s' % six.text_type(e) ] } raise tk.ValidationError( error_dict, error_summary=package_error_summary(error_dict)) except Exception as e: if bool(os.getenv('DEBUG')): raise error_dict = { 'spatial': [u'Error: %s' % six.text_type(e)] } raise tk.ValidationError( error_dict, error_summary=package_error_summary(error_dict)) elif (extra.state == 'active' and not extra.value) or extra.state == 'deleted': # Delete extent from table save_package_extent(package.id, None) break
def resource_validation_run(context, data_dict): u''' Start a validation job against a resource. Returns the identifier for the job started. Note that the resource format must be one of the supported ones, currently CSV or Excel. :param resource_id: id of the resource to validate :type resource_id: string :rtype: string ''' t.check_access(u'resource_validation_run', context, data_dict) if not data_dict.get(u'resource_id'): raise t.ValidationError({u'resource_id': u'Missing value'}) resource = t.get_action(u'resource_show')({}, { u'id': data_dict[u'resource_id'] }) # TODO: limit to sysadmins async_job = data_dict.get(u'async', True) # Ensure format is supported if not resource.get(u'format', u'').lower() in settings.SUPPORTED_FORMATS: raise t.ValidationError({ u'format': u'Unsupported resource format.' + u'Must be one of {}'.format(u','.join(settings.SUPPORTED_FORMATS)) }) # Ensure there is a URL or file upload if not resource.get(u'url') and not resource.get(u'url_type') == u'upload': raise t.ValidationError( {u'url': u'Resource must have a valid URL or an uploaded file'}) # Check if there was an existing validation for the resource Session = context['model'].Session try: validation = Session.query(Validation).filter( Validation.resource_id == data_dict['resource_id']).one() except NoResultFound: validation = None if validation: # Reset values validation.finished = None validation.report = None validation.error = None validation.created = datetime.datetime.utcnow() validation.status = u'created' else: validation = Validation(resource_id=resource['id']) Session.add(validation) Session.commit() if async_job: enqueue_job(run_validation_job, [resource]) else: run_validation_job(resource)
def resource_update(context, data_dict): '''Update a resource. This is duplicate of the CKAN core resource_update action, with just the addition of a synchronous data validation step. This is of course not ideal but it's the only way right now to hook reliably into the creation process without overcomplicating things. Hopefully future versions of CKAN will incorporate more flexible hook points that will allow a better approach. ''' model = context['model'] id = t.get_or_bust(data_dict, "id") if not data_dict.get('url'): data_dict['url'] = '' resource = model.Resource.get(id) context["resource"] = resource old_resource_format = resource.format if not resource: log.debug('Could not find resource %s', id) raise t.ObjectNotFound(t._('Resource was not found.')) t.check_access('resource_update', context, data_dict) del context["resource"] package_id = resource.package.id pkg_dict = t.get_action('package_show')(dict(context, return_type='dict'), { 'id': package_id }) for n, p in enumerate(pkg_dict['resources']): if p['id'] == id: break else: log.error('Could not find resource %s after all', id) raise t.ObjectNotFound(t._('Resource was not found.')) # Persist the datastore_active extra if already present and not provided if ('datastore_active' in resource.extras and 'datastore_active' not in data_dict): data_dict['datastore_active'] = resource.extras['datastore_active'] for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_update(context, pkg_dict['resources'][n], data_dict) upload = uploader.get_resource_uploader(data_dict) if 'mimetype' not in data_dict: if hasattr(upload, 'mimetype'): data_dict['mimetype'] = upload.mimetype if 'size' not in data_dict and 'url_type' in data_dict: if hasattr(upload, 'filesize'): data_dict['size'] = upload.filesize pkg_dict['resources'][n] = data_dict try: context['defer_commit'] = True context['use_cache'] = False updated_pkg_dict = t.get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except t.ValidationError, e: try: raise t.ValidationError(e.error_dict['resources'][-1]) except (KeyError, IndexError): raise t.ValidationError(e.error_dict)
def open_datarequest(context, data_dict): """ Action to open a data request. Access rights will be checked before opening the data request. If the user is not allowed, a NotAuthorized exception will be risen. :param id: The ID of the data request to be closed :type id: string :returns: A dict with the data request (id, user_id, title, description, organization_id, open_time, accepted_dataset, close_time, closed, followers) :rtype: dict """ model = context['model'] session = context['session'] datarequest_id = data_dict.get('id', '') # Check id if not datarequest_id: raise tk.ValidationError(tk._('Data Request ID has not been included')) # Init the data base db.init_db(model) # Check access tk.check_access(constants.OPEN_DATAREQUEST, context, data_dict) # Get the data request result = db.DataRequest.get(id=datarequest_id) if not result: raise tk.ObjectNotFound( tk._('Data Request %s not found in the data base') % datarequest_id) data_req = result[0] data_req.closed = False data_req.accepted_dataset_id = None data_req.close_time = None if tk.h.closing_circumstances_enabled: data_req.close_circumstance = None data_req.approx_publishing_date = None session.add(data_req) session.commit() datarequest_dict = _dictize_datarequest(data_req) # Mailing users = [data_req.user_id] # Creator email _send_mail(users, 'open_datarequest_creator', datarequest_dict, 'Data Request Opened Creator Email') if datarequest_dict['organization']: users = _get_admin_users_from_organisation(datarequest_dict) # Admins of organisation email _send_mail(users, 'open_datarequest_organisation', datarequest_dict, 'Data Request Opened Admins Email') return datarequest_dict
def close_datarequest(original_action, context, data_dict): """ Action to close a data request. Access rights will be checked before closing the data request. If the user is not allowed, a NotAuthorized exception will be risen. Data QLD modification Will send email notification to the data request creator :param id: The ID of the data request to be closed :type id: string :param accepted_dataset_id: The ID of the dataset accepted as solution for this data request :type accepted_dataset_id: string :returns: A dict with the data request (id, user_id, title, description, organization_id, open_time, accepted_dataset, close_time, closed, followers) :rtype: dict """ model = context['model'] session = context['session'] datarequest_id = data_dict.get('id', '') # Check id if not datarequest_id: raise tk.ValidationError(tk._('Data Request ID has not been included')) # Init the data base db.init_db(model) # Check access tk.check_access(constants.CLOSE_DATAREQUEST, context, data_dict) # Get the data request result = db.DataRequest.get(id=datarequest_id) if not result: raise tk.ObjectNotFound( tk._('Data Request %s not found in the data base') % datarequest_id) # Validate data validator.validate_datarequest_closing(context, data_dict) data_req = result[0] # Was the data request previously closed? if data_req.closed: raise tk.ValidationError([tk._('This Data Request is already closed')]) data_req.closed = True data_req.accepted_dataset_id = data_dict.get('accepted_dataset_id') or None data_req.close_time = datetime.datetime.utcnow() _undictize_datarequest_closing_circumstances(data_req, data_dict) session.add(data_req) session.commit() datarequest_dict = _dictize_datarequest(data_req) # Mailing users = [data_req.user_id] _send_mail(users, 'close_datarequest_creator', datarequest_dict, 'Data Request Closed Send Email') return datarequest_dict
def update_datarequest(original_action, context, data_dict): """ Action to update a data request. The function checks the access rights of the user before updating the data request. If the user is not allowed a NotAuthorized exception will be risen. In addition, you should note that the parameters will be checked and an exception (ValidationError) will be risen if some of these parameters are invalid. Data QLD modification Will send email notification if organisation was changed to users of assigned organisation with admin access :param id: The ID of the data request to be updated :type id: string :param title: The title of the data request :type title: string :param description: A brief description for your data request :type description: string :param organiztion_id: The ID of the organization you want to asign the data request. :type organization_id: string :returns: A dict with the data request (id, user_id, title, description, organization_id, open_time, accepted_dataset, close_time, closed, followers) :rtype: dict """ model = context['model'] session = context['session'] datarequest_id = data_dict.get('id', '') if not datarequest_id: raise tk.ValidationError(tk._('Data Request ID has not been included')) # Init the data base db.init_db(model) # Check access tk.check_access(constants.UPDATE_DATAREQUEST, context, data_dict) # Get the initial data result = db.DataRequest.get(id=datarequest_id) if not result: raise tk.ObjectNotFound( tk._('Data Request %s not found in the data base') % datarequest_id) data_req = result[0] # Avoid the validator to return an error when the user does not change the title context['avoid_existing_title_check'] = data_req.title == data_dict[ 'title'] # Validate data validator.validate_datarequest(context, data_dict) # Data QLD modification organisation_updated = data_req.organization_id != data_dict[ 'organization_id'] if organisation_updated: unassigned_organisation_id = data_req.organization_id # Set the data provided by the user in the data_red _undictize_datarequest_basic(data_req, data_dict) session.add(data_req) session.commit() datarequest_dict = _dictize_datarequest(data_req) if datarequest_dict['organization'] and organisation_updated: # Data QLD modification # Email Admin users of the assigned organisation users = _get_admin_users_from_organisation(datarequest_dict) users.discard(context['auth_user_obj'].id) _send_mail(users, 'new_datarequest_organisation', datarequest_dict, 'Data Request Assigned Email') # Email Admin users of unassigned organisation org_dict = { 'organization': _get_organization(unassigned_organisation_id) } users = _get_admin_users_from_organisation(org_dict) users.discard(context['auth_user_obj'].id) _send_mail(users, 'unassigned_datarequest_organisation', datarequest_dict, 'Data Request Unassigned Email') return datarequest_dict
def resource_update(context, data_dict): '''Update a resource. This is duplicate of the CKAN core resource_update action, with just the addition of a synchronous data validation step. This is of course not ideal but it's the only way right now to hook reliably into the creation process without overcomplicating things. Hopefully future versions of CKAN will incorporate more flexible hook points that will allow a better approach. ''' model = context['model'] id = t.get_or_bust(data_dict, "id") if not data_dict.get('url'): data_dict['url'] = '' resource = model.Resource.get(id) context["resource"] = resource old_resource_format = resource.format if not resource: log.debug('Could not find resource %s', id) raise t.ObjectNotFound(t._('Resource was not found.')) t.check_access('resource_update', context, data_dict) del context["resource"] package_id = resource.package.id pkg_dict = t.get_action('package_show')(dict(context, return_type='dict'), {'id': package_id}) for n, p in enumerate(pkg_dict['resources']): if p['id'] == id: break else: log.error('Could not find resource %s after all', id) raise t.ObjectNotFound(t._('Resource was not found.')) # Persist the datastore_active extra if already present and not provided if ('datastore_active' in resource.extras and 'datastore_active' not in data_dict): data_dict['datastore_active'] = resource.extras['datastore_active'] for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_update(context, pkg_dict['resources'][n], data_dict) upload = uploader.get_resource_uploader(data_dict) if 'mimetype' not in data_dict: if hasattr(upload, 'mimetype'): data_dict['mimetype'] = upload.mimetype if 'size' not in data_dict and 'url_type' in data_dict: if hasattr(upload, 'filesize'): data_dict['size'] = upload.filesize pkg_dict['resources'][n] = data_dict try: context['defer_commit'] = True context['use_cache'] = False updated_pkg_dict = t.get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except t.ValidationError as e: try: raise t.ValidationError(e.error_dict['resources'][-1]) except (KeyError, IndexError): raise t.ValidationError(e.error_dict) upload.upload(id, uploader.get_max_resource_size()) # Custom code starts if get_update_mode_from_config() == u'sync': run_validation = True for plugin in plugins.PluginImplementations(IDataValidation): if not plugin.can_validate(context, data_dict): log.debug('Skipping validation for resource %s', id) run_validation = False if run_validation: is_local_upload = ( hasattr(upload, 'filename') and upload.filename is not None and isinstance(upload, uploader.ResourceUpload)) _run_sync_validation( id, local_upload=is_local_upload, new_resource=True) # Custom code ends model.repo.commit() resource = t.get_action('resource_show')(context, {'id': id}) if old_resource_format != resource['format']: t.get_action('resource_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': updated_pkg_dict, 'resource': resource}) for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.after_update(context, resource) return resource
def resource_create(context, data_dict): '''Appends a new resource to a datasets list of resources. This is duplicate of the CKAN core resource_create action, with just the addition of a synchronous data validation step. This is of course not ideal but it's the only way right now to hook reliably into the creation process without overcomplicating things. Hopefully future versions of CKAN will incorporate more flexible hook points that will allow a better approach. ''' model = context['model'] package_id = t.get_or_bust(data_dict, 'package_id') if not data_dict.get('url'): data_dict['url'] = '' pkg_dict = t.get_action('package_show')( dict(context, return_type='dict'), {'id': package_id}) t.check_access('resource_create', context, data_dict) for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.before_create(context, data_dict) if 'resources' not in pkg_dict: pkg_dict['resources'] = [] upload = uploader.get_resource_uploader(data_dict) if 'mimetype' not in data_dict: if hasattr(upload, 'mimetype'): data_dict['mimetype'] = upload.mimetype if 'size' not in data_dict: if hasattr(upload, 'filesize'): data_dict['size'] = upload.filesize pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False t.get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except t.ValidationError as e: try: raise t.ValidationError(e.error_dict['resources'][-1]) except (KeyError, IndexError): raise t.ValidationError(e.error_dict) # Get out resource_id resource from model as it will not appear in # package_show until after commit resource_id = context['package'].resources[-1].id upload.upload(resource_id, uploader.get_max_resource_size()) # Custom code starts if get_create_mode_from_config() == u'sync': run_validation = True for plugin in plugins.PluginImplementations(IDataValidation): if not plugin.can_validate(context, data_dict): log.debug('Skipping validation for resource %s', resource_id) run_validation = False if run_validation: is_local_upload = ( hasattr(upload, 'filename') and upload.filename is not None and isinstance(upload, uploader.ResourceUpload)) _run_sync_validation( resource_id, local_upload=is_local_upload, new_resource=True) # Custom code ends model.repo.commit() # Run package show again to get out actual last_resource updated_pkg_dict = t.get_action('package_show')( context, {'id': package_id}) resource = updated_pkg_dict['resources'][-1] # Add the default views to the new resource t.get_action('resource_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True }, {'resource': resource, 'package': updated_pkg_dict }) for plugin in plugins.PluginImplementations(plugins.IResourceController): plugin.after_create(context, resource) return resource