def owner_org_validator_publisher(key, data, errors, context): """ owner_org_validator modified to allow publisher to update datasets for any org """ value = data.get(key) if value is missing or not value: if not ckan.new_authz.check_config_permission( 'create_unowned_dataset'): raise Invalid(_('A organization must be supplied')) data.pop(key, None) raise StopOnError model = context['model'] group = model.Group.get(value) if not group: raise Invalid(_('Organization does not exist')) group_id = group.id user = context['user'] user = model.User.get(user) if not (may_publish_datasets(user) or user.is_in_group(group_id)): raise Invalid(_('You cannot add a dataset to this organization')) data[key] = group_id
def ngds_tag_length_validator(value, context): if len(value) < MIN_TAG_LENGTH: raise Invalid(('Tag "%s" length is less than minimum %s') % (value, MIN_TAG_LENGTH)) if len(value) > MAX_TAG_LENGTH: raise Invalid(('Tag "%s" length is more than maximum %i') % (value, MAX_TAG_LENGTH)) return value
def package_name_validator(key, data, errors, context): model = context["model"] session = context["session"] package = context.get("package") query = session.query(model.Package.name).filter_by(name=data[key]) if package: package_id = package.id else: package_id = data.get(key[:-1] + ("id", )) if package_id and package_id is not missing: query = query.filter(model.Package.id <> package_id) result = query.first() if result: errors[key].append(_('That URL is already in use.')) value = data[key] if len(value) < PACKAGE_NAME_MIN_LENGTH: raise Invalid( _('Name "%s" length is less than minimum %s') % (value, PACKAGE_NAME_MIN_LENGTH)) if len(value) > PACKAGE_NAME_MAX_LENGTH: raise Invalid( _('Name "%s" length is more than maximum %s') % (value, PACKAGE_NAME_MAX_LENGTH))
def kata_owner_org_validator(key, data, errors, context): ''' Modified version of CKAN's owner_org_validator. Anyone can add a dataset to an organisation. If the organisation doesn't exist it is created later on. :param key: key :param data: data :param errors: errors :param context: context :return: nothing ''' value = data.get(key) if value is missing or not value: err = _( u"An organization must be supplied. If you do not find a suitable organization, please choose the default organization " u"'Ei linkitetä organisaatioon - do not link to an organization'.") raise Invalid(err) model = context['model'] group = model.Group.get(value) if not group: org_name = re.sub(r'[^a-zA-Z0-9]+', '-', utils.slugify(value)).lower() org_name = re.sub(r'-$', '', org_name) group = model.Group.get(org_name) if not group: err = _( u'The provided organization does not exist. Please contact Etsin administration using our contact form at http://openscience.fi/contact-form' ) raise Invalid(err) if group: data[key] = group.id
def harvest_object_extras_validator(value, context): if not isinstance(value, dict): raise Invalid('extras must be a dict') for v in value.values(): if not isinstance(v, basestring): raise Invalid('extras must be a dict of strings') return value
def service_charge_validator(key, data, errors, context): """Validates the fields related to service charge. If the service has a charge, then the user must also supply either the pricing information URL or a description of the service pricing or both.""" # Get the value for the service charge radio field service_charge_value = data.get(key) if service_charge_value is missing or service_charge_value is None or service_charge_value == '': # At least one of the service charge values must be selected raise Invalid(_('Service charge must be supplied')) elif service_charge_value == 'yes': # Check if the service has a charge # Get the pricing information url and service price description values from the data (the key is a tuple) pricing_url_value = data.get(('pricing_information_url', )) service_price_value = data.get(('service_price_description', )) if ((pricing_url_value is missing or pricing_url_value is None or pricing_url_value == '') and (service_price_value is missing or service_price_value is None or service_price_value == '')): # If both the pricing information url and the service price description fields are empty, show an error message raise Invalid( _('If there is a service charge, you must supply either the pricing information web address for this service or a description of ' + 'the service pricing or both')) return service_charge_value
def user_name_sanitize(key, data, errors, context): value = data[key] if is_input_valid(value) is False: raise Invalid(_('Input Contains Invalid Text')) elif value and re.match('admin', value, re.IGNORECASE): raise Invalid(_('Input Contains Invalid Text')) else: pass
def name_validator(val, context): # check basic textual rules if len(val) < 2: raise Invalid(_('Name must be at least %s characters long') % 2) if not name_match.match(val): raise Invalid( _('Name must be purely lowercase alphanumeric ' '(ascii) characters and these symbols: -_')) return val
def name_validator(val, context): # check basic textual rules if len(val) < 2: raise Invalid(_('Name must be at least %s characters long') % 2) if len(val) > PACKAGE_NAME_MAX_LENGTH: raise Invalid(_('Name must be a maximum of %i characters long') % \ PACKAGE_NAME_MAX_LENGTH) if not name_match.match(val): raise Invalid(_('Url must be purely lowercase alphanumeric ' '(ascii) characters and these symbols: -_')) return val
def convert(key, data, errors, context): value = data.get(key) #logger.debug('Processing field %s for editing (%r)', key[0], value) ser = ext_metadata.serializer_for_field(field) # Not supposed to handle missing inputs here assert not value is missing # Convert from input/db or initialize to defaults if not value: # Determine default value and initialize if field.default is not None: value = field.default elif field.defaultFactory is not None: value = field.defaultFactory() else: # Convert from input or db if ser and isinstance(value, basestring): try: value = ser.loads(value) except Exception as ex: raise Invalid(u'Invalid input (%s)' % (ex.message)) # Ignore empty values (act exactly as the `ignore_empty` validator). # Note If a field is marked as required, the check is postponed until # the dataset is validated at dataset level. if not value: data.pop(key) raise StopOnError # Validate if not 'skip_validation' in context: try: # Invoke the zope.schema validator field.validate(value) except zope.schema.ValidationError as ex: # Map this exception to the one expected by CKAN raise Invalid(u'Invalid (%s)' % (type(ex).__name__)) # Convert to a properly formatted string (for db storage) if ser: value = ser.dumps(value) data[key] = value return
def tag_name_validator(value, context): tagname_match = re.compile('[\w \-.]*$', re.UNICODE) if not tagname_match.match(value): raise Invalid(_('Tag "%s" must be alphanumeric ' 'characters or symbols: -_.') % (value)) return value
def country_code(value,context): value = value.upper() if not value in [c[0] for c in COUNTRIES]: raise Invalid('Unknown country code "%s"' % value) return value
def yes_no(value,context): value = value.lower() if not value in ['yes','no']: raise Invalid('Value must be one of [yes, no]') return value
def iati_resource_url_mandatory(value, context): value = iati_resource_url(value, context) if (not value) or (not value.strip()): raise Invalid('URL cannot be empty') return value
def validate_primary_pid_uniqueness(key, data, errors, context): ''' Validate dataset primary pid is unique, i.e. it does not exist already in any other dataset. :param key: key :param data: data :param errors: errors :param context: context ''' lst = list(key) lst[2] = 'type' pid_type_key = tuple(lst) if data.get(pid_type_key) == u'primary': exam_primary_pid = data.get(key) exam_package_id = data.get(('id', )) all_similar_pids_query = model.Session.query(model.PackageExtra)\ .filter(model.PackageExtra.key.like('pids_%_id'))\ .filter(model.PackageExtra.value == exam_primary_pid)\ .join(model.Package).filter(model.Package.state == 'active').values('package_id', 'key', 'value') for package_id, pid_id_key, pid_id_value in all_similar_pids_query: if package_id != exam_package_id: pid_type_key = 'pids_' + pid_id_key[pid_id_key.find('_') + 1:pid_id_key. rfind('_')] + '_type' primary_type_in_other_dataset_query = model.Session.query(model.PackageExtra)\ .filter(and_(model.PackageExtra.package_id == package_id, model.PackageExtra.key == pid_type_key, model.PackageExtra.value == u'primary')) if primary_type_in_other_dataset_query.first(): raise Invalid( _('Primary identifier {pid} exists in another dataset {id}' ).format(pid=exam_primary_pid, id=package_id))
def protect_portal_release_date(key, data, errors, context): """ Ensure the portal_release_date is not changed by an unauthorized user. """ if is_sysadmin(context['user']): return original = '' package = context.get('package') if package: original = package.extras.get('portal_release_date', '') value = data.get(key, '') if original == value: return user = context['user'] user = model.User.get(user) if may_publish_datasets(user): return if value == '': # silently replace with the old value when none is sent data[key] = original return raise Invalid('Cannot change value of key from %s to %s. ' 'This key is read-only' % (original, value))
def user_name_exists(user_name, context): model = context['model'] session = context['session'] result = session.query(model.User).filter_by(name=user_name).first() if not result: raise Invalid('%s: %s' % (_('Not found'), _('User'))) return result.name
def end_time_validator(key, data, errors, context): """ Raises Invalid if end time is smaller than start time. """ start_time = data.get(('start_time', )) end_time = data.get(('end_time', )) if not start_time or not end_time: return date_validator(('start_time', ), data, errors, context) date_validator(('end_time', ), data, errors, context) if errors.get(('start_time', )) or errors.get(('end_time', )): return start_time_p = parse(start_time, default=datetime(1, 1, 1)) end_time_p = parse(end_time, default=datetime(date.today().year, 12, 1)) if len(end_time) == 7: # If the day of month is missing end_time_p = end_time_p.replace( \ day=monthrange(end_time_p.year, end_time_p.month)[1]) if end_time_p < start_time_p: raise Invalid( _('End time should be greater than \ or equal to start time'))
def package_id_not_changed(value, context): package = context.get('package') if package and value != package.id: raise Invalid('Cannot change value of key from %s to %s. ' 'This key is read-only' % (package.id, value)) return value
def callable(key, data, errors, context): new_tags = data.get(key) if not new_tags: return if isinstance(new_tags, basestring): new_tags = [new_tags] # get current number of tags n = 0 for k in data.keys(): if k[0] == 'tags': n = max(n, k[1] + 1) v = model.Vocabulary.get(vocab) if not v: raise Invalid(_('Tag vocabulary "%s" does not exist') % vocab) context['vocabulary'] = v for tag in new_tags: tag_length_validator(tag, context) tag_name_validator(tag, context) tag_in_vocabulary_validator(tag, context) for num, tag in enumerate(new_tags): data[('tags', num+n, 'name')] = tag data[('tags', num+n, 'vocabulary_id')] = v.id
def harvest_job_exists(value, context): '''Check if a harvest job exists and returns the model if it does''' result = HarvestJob.get(value) if not result: raise Invalid('Harvest Job with id %r does not exist.' % str(value)) return result
def harvest_source_url_validator(key,data,errors,context): package = context.get("package") if package: package_id = package.id else: package_id = data.get(key[:-1] + ("id",)) new_url = _normalize_url(data[key]) #pkg_id = data.get(('id',),'') q = model.Session.query(model.Package.url, model.Package.state) \ .filter(model.Package.type==DATASET_TYPE_NAME) if package_id: # When editing a source we need to avoid its own URL q = q.filter(model.Package.id!=package_id) existing_sources = q.all() for url, state in existing_sources: url = _normalize_url(url) if url == new_url: raise Invalid('There already is a Harvest Source for this URL: %s' % data[key]) return data[key]
def harvest_source_id_exists(value, context): result = HarvestSource.get(value) if not result: raise Invalid('Harvest Source with id %r does not exist.' % str(value)) return value
def vocabulary_id_exists(value, context): model = context['model'] session = context['session'] result = session.query(model.Vocabulary).get(value) if not result: raise Invalid(_('Tag vocabulary was not found.')) return value
def preprocess_dataset_for_edit(key, data, errors, context): assert key[0] == '__before', \ 'This validator can only be invoked in the __before stage' def debug(msg): logger.debug('Pre-processing dataset for editing: %s' % (msg)) received_data = {k: v for k, v in data.iteritems() if not (v is missing)} unexpected_data = received_data.get(('__extras', ), {}) #debug('Received data: %r' %(received_data)) #debug('Received (but unexpected) data: %r' %(unexpected_data)) # Figure out if a nested dict is supplied (instead of a flat one). # Note This "nested" input format is intended to be used by the action api, # as it is far more natural to the JSON format. Still, this format option is # not restricted to api requests (it is possible to be used even by form-based # requests). key_prefix = dtype = received_data.get(('dataset_type', )) r = unexpected_data.get(dtype) if dtype else None if isinstance(r, dict) and (dtype in ext_metadata.dataset_types): # Looks like a nested dict keyed at key_prefix debug('Trying to flatten input at %s' % (key_prefix)) if any([k[0].startswith(key_prefix) for k in received_data]): raise Invalid('Not supported: Found both nested/flat dicts') # Convert to expected flat fields key_converter = lambda k: '.'.join([key_prefix] + map(str, k)) r = dictization.flatten(r, key_converter) data.update({(k, ): v for k, v in r.iteritems()}) #raise Breakpoint('preprocess_dataset_for_edit') pass
def vocabulary_id_not_changed(value, context): vocabulary = context.get('vocabulary') if vocabulary and value != vocabulary.id: raise Invalid( _('Cannot change value of key from %s to %s. ' 'This key is read-only') % (vocabulary.id, value)) return value
def user_about_validator(value, context): if 'http://' in value or 'https://' in value: raise Invalid( _('Edit not allowed as it looks like spam. Please avoid links in your description.' )) return value
def no_http(value, context): model = context['model'] session = context['session'] if 'http:' in value: raise Invalid(_('No links are allowed in the log_message.')) return value
def vocabulary_name_validator(name, context): model = context['model'] session = context['session'] if len(name) < VOCABULARY_NAME_MIN_LENGTH: raise Invalid( _('Name must be at least %s characters long') % VOCABULARY_NAME_MIN_LENGTH) if len(name) > VOCABULARY_NAME_MAX_LENGTH: raise Invalid( _('Name must be a maximum of %i characters long') % VOCABULARY_NAME_MAX_LENGTH) query = session.query(model.Vocabulary.name).filter_by(name=name) result = query.first() if result: raise Invalid(_('That vocabulary name is already in use.')) return name
def iati_publisher_name_validator(value, context): try: return p.toolkit.get_validator('name_validator')(value, context) except Invalid: raise Invalid("This will be the unique identifier for the publisher. " "Where possible use a short abbreviation of your organisation's name. " "For example: 'dfid' or 'worldbank' Must be at least two characters long and lower case. " "Can include letters, numbers and also - (dash) and _ (underscore).")