def list_of_strings_or_lists(key, data, errors, context): value = data.get(key) if not isinstance(value, list): raise df.Invalid('Not a list') for x in value: if not isinstance(x, basestring) and not isinstance(x, list): raise df.Invalid('%s: %s' % ('Neither a string nor a list', x))
def validator(key, data, errors, context): extras = data.get(FORM_EXTRAS) see_alsos_validated = [] if extras: see_alsos_from_form = get_see_alsos_from_form(extras) if see_alsos_from_form: context = {} for package_name in see_alsos_from_form: try: package = get_action('package_show')(context, { 'id': package_name }) # noqa if not package.get('type') == 'dataset': raise df.Invalid( _('{} can not be chosen since it is a {}.'. format(package_name, package.get('type')))) see_alsos_validated.append( {'dataset_identifier': package.get('identifier')}) except NotFound: raise df.Invalid( _('Dataset {} could not be found .'.format( package_name))) if see_alsos_validated: data[key] = json.dumps(see_alsos_validated) elif not _jsondata_for_key_is_set(data, key): data[key] = '{}'
def list_of_strings_or_lists(key: FlattenKey, data: FlattenDataDict, errors: FlattenErrorDict, context: Context): value = data.get(key) if not isinstance(value, list): raise df.Invalid('Not a list') for x in value: if not isinstance(x, str) and not isinstance(x, list): raise df.Invalid('%s: %s' % ('Neither a string nor a list', x))
def ecportal_description_validator(val, context): ''' The main description of the dataset. It is often displayed with the package title. In particular, it should start with a short sentence that describes the data set succinctly, because the first few words alone may be used in some views of the data sets. ''' if not isinstance(val, basestring): raise df.Invalid(_('Description must be strings')) if not val: raise df.Invalid(_('Enter a Description')) return val
def ecportal_uri_validator(val, context): ''' This identifier is the universal and main identification of the dataset maintained by the publisher. e.g. http://ec.europa.eu/id/uri-1 ''' if not val: raise df.Invalid(_('URI must be set')) if val in ['new', 'edit', 'search']: raise df.Invalid(_('That URI cannot be used')) return val
def __validate_corpus(self): if self.corpus_length > len(self.training_data): msg = ('The minimum length of the corpus is {}, ' 'current corpus has {}').format(int(self.corpus_length), len(self.training_data)) self.logger.debug(msg) raise df.Invalid(msg) if len(self.training_data) <= self.sequence_length: msg = ('The length of the RNN sequence %d, given %d' % (int(sequence_length), len(self.training_data))) self.logger.debug(msg) raise df.Invalid(msg) return self
def stringToTags(key, data, error, context): tag_string = data.get(key) vocab = "None" separator = "," #Project Vocabularies if key[0] == "ILRI_prjsubjects": vocab = "ILRI_vocsubjects" #Study Vocabularies if key[0] == "ILRI_actyregions": vocab = "ILRI_vocregions" if key[0] == "ILRI_actycountries": vocab = "ILRI_voccountries" separator = "+" if key[0] == "ILRI_actyspecies": vocab = "ILRI_vocspecies" #At a certain point in the creation of the dataset tagString is an array. So we test for it if type(tag_string) is list: tags = tag_string else: tags = tag_string.split(separator) if vocab == "None": raise df.Invalid( _('Tag vocabulary for key "%s" does not exist') % key[0]) for tag in tags: convertToTags(vocab, fixTag(tag), data, error, context)
def convertToTags(vocab, newtag, data, error, context): new_tags = newtag if not new_tags: return if isinstance(new_tags, basestring): new_tags = [new_tags] # get current number of tags n = 0 for k in data.keys(): if k[0] == 'tags': n = max(n, k[1] + 1) v = model.Vocabulary.get(vocab) if not v: raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab) context['vocabulary'] = v for tag in new_tags: validators.tag_in_vocabulary_validator(tag, context) for num, tag in enumerate(new_tags): data[('tags', num + n, 'name')] = tag data[('tags', num + n, 'vocabulary_id')] = v.id
def func(key: FlattenKey, data: FlattenDataDict, errors: FlattenErrorDict, context: Context): new_tags = data.get(key) if not new_tags: return if isinstance(new_tags, str): new_tags = [new_tags] # get current number of tags n = 0 for k in data.keys(): if k[0] == 'tags': n = max(n, k[1] + 1) v = model.Vocabulary.get(vocab) if not v: raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab) context['vocabulary'] = v for tag in new_tags: validators.tag_in_vocabulary_validator(tag, context) for num, tag in enumerate(new_tags): data[('tags', num + n, 'name')] = tag data[('tags', num + n, 'vocabulary_id')] = v.id
def convert_package_name_or_id_to_id(package_name_or_id: Any, context: Context) -> Any: '''Return the package id for the given package name or id. The point of this function is to convert package names to ids. If you have something that may be a package name or id you can pass it into this function and get the id out either way. Also validates that a package with the given name or id exists. :returns: the id of the package with the given name or id :rtype: string :raises: ckan.lib.navl.dictization_functions.Invalid if there is no package with the given name or id ''' session = context['session'] result = session.query( model.Package).filter_by(id=package_name_or_id).first() if not result: result = session.query( model.Package).filter_by(name=package_name_or_id).first() if not result: raise df.Invalid('%s: %s' % (_('Not found'), _('Dataset'))) return result.id
def update_frequency_vocab(value, context): if not value in DatasetForm.UPDATE_FREQUENCIES: raise df.Invalid( _('update_frequency should be set to one of following values: ' + ', '.join(DatasetForm.UPDATE_FREQUENCIES))) return value
def validator(key, data, errors, context): if key not in data: data[key] = '[]' else: temporals = [] if not data.get(key): extras = data.get(FORM_EXTRAS) if extras: temporals = get_temporals_from_form(extras) for temporal in temporals: if not temporal['start_date'] and temporal['end_date']: raise df.Invalid( _('A valid temporal must have both start and end date' ) # noqa ) else: temporals = data[key] if not isinstance(temporals, list): temporals = json.loads(temporals) cleaned_temporals = [] for temporal in temporals: cleaned_temporal = {} for k, v in temporal.items(): cleaned_temporal[k] = ogdch_date_validator(v) cleaned_temporals.append(cleaned_temporal) data[key] = json.dumps(cleaned_temporals)
def callable(key, data, errors, context): new_tags = data.get(key) if not new_tags: return if isinstance(new_tags, basestring): new_tags = [new_tags] # get current number of tags n = 0 for k in data.keys(): if k[0] == 'tags': n = max(n, k[1] + 1) v = model.Vocabulary.get(vocab) if not v: raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab) context['vocabulary'] = v for tag in new_tags: if tag_in_vocabulary(tag, v.id, context) is None: log.debug( u'Tag "{}" does not exist in vocabulary "{}", will create one' .format(tag, v.name)) data = {'name': tag, 'vocabulary_id': v.id} created_tag = toolkit.get_action('tag_create')(context, data) log.debug(u'Created tag "{}" in vocabulary "{}"'.format( created_tag, v.name)) validators.tag_in_vocabulary_validator(tag, context) for num, tag in enumerate(new_tags): data[('tags', num + n, 'name')] = tag data[('tags', num + n, 'vocabulary_id')] = v.id
def is_positive_integer(value, context): value = validators.int_validator(value, context) if value < 1: raise df.Invalid(_('Must be a positive integer')) return value
def convert_to_tags(key, data, context, vocab): new_tags = data[key] if not new_tags: return if isinstance(new_tags, basestring): new_tags = [new_tags] v = model.Vocabulary.get(vocab) if not v: raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab) context['vocabulary'] = v for tag in new_tags: validators.tag_in_vocabulary_validator(tag, context) for num, tag in enumerate(new_tags): if not data['tags']: data['tags'] = [] data['tags'].append({ 'name': tag, 'vocabulary_id': v.id, 'state': 'active' }) return data
def convert_to_json_if_string(value, context): if isinstance(value, basestring): try: return json.loads(value) except ValueError: raise df.Invalid(_('Could not parse as valid JSON')) else: return value
def ecportal_date_to_db(value, context): if not value: return try: timedate_dict = ECPortalDateType.parse_timedate(value, 'db') except field_types.DateConvertError, e: # Cannot parse raise df.Invalid(str(e))
def json_validator(value, context): if isinstance(value, dict) or isinstance(value, list): return value try: value = json.loads(value) except ValueError: raise df.Invalid('Cannot parse JSON') return value
def valid_resource_url(key, flattened_data, errors, context): """ Check whether the resource URL is permitted. This requires either an uploaded file, or passing any configured whitelist/blacklist checks. """ valid_url(key, flattened_data, errors, context) value = flattened_data[key] if not value or _is_upload(key, flattened_data): LOG.debug("No resource URL found, or file is uploaded; skipping check") return if not RESOURCE_WHITELIST and not RESOURCE_BLACKLIST: LOG.debug("No whitelist or blacklist found; skipping URL check") return # parse our URL so we can extract the domain resource_url = urlparse.urlparse(value) if not resource_url: LOG.warn("Invalid resource URL") raise df.Invalid(_('Must be a valid URL')) LOG.debug("Requested resource domain is %s", resource_url.hostname) if not resource_url.hostname: raise df.Invalid(_('Must be a valid URL')) address_resolution = _resolve_address(resource_url.hostname) # reject the URL if it matches any blacklist entry if RESOURCE_BLACKLIST: for domain in RESOURCE_BLACKLIST: if _domain_match(resource_url.hostname, domain, address_resolution): raise df.Invalid(_('Domain is blocked')) # require the URL to match a whitelist entry, if applicable if RESOURCE_WHITELIST: for domain in RESOURCE_WHITELIST: if _domain_match(resource_url.hostname, domain, address_resolution): return raise df.Invalid( _('Must be from an allowed domain: {}').format(RESOURCE_WHITELIST)) return
def verify_value_in_list(key, data, errors, context): value = data.get(key) if allow_not_selected and value == not_selected_value: del data[key] # Don't go further in the validation chain. Ex: convert to extras doesn't need to be called raise df.StopOnError if not value or value not in value_list: raise df.Invalid(_('needs to be a value from the list'))
def convert_to_json_if_string(value: Any, context: Context) -> Any: """Parse string value as a JSON object. """ if isinstance(value, str): try: return json.loads(value) except ValueError: raise df.Invalid(_('Could not parse as valid JSON')) else: return value
def publisher_exists(publisher_name, context): ''' Raises Invalid if the given publisher_name does not exist in the model given in the context, otherwise returns the given publisher_name. ''' try: logic.get_action('group_show')(context, {'id': publisher_name}) except logic.NotFound: raise df.Invalid('%s: %s' % (_('Publisher not found'), publisher_name)) return publisher_name
def convert_date_to_db(value, context): if context.get('api_version') is None: return converters.date_to_db(value, context) value, error = conv.pipe( conv.test_isinstance(basestring), conv.cleanup_line, conv.test(year_or_month_or_day_re.match), )(value, state=conv.default_state) if error is not None: raise df.Invalid(unicode(error).encode('utf-8')) return value
def supplier_id_validator(key, data, errors, context): value = data.get(key) if value is df.missing or not value: data.pop(key, None) raise df.StopOnError model = context['model'] group = model.Group.get(value) if not group: raise df.Invalid(tk._('Organization does not exist')) data[key] = group.id
def validator(key, data, errors, context): identifier = data.get(key[:-1] + ('identifier', )) dataset_id = data.get(key[:-1] + ('id', )) dataset_owner_org = data.get(key[:-1] + ('owner_org', )) if not identifier: raise df.Invalid(_('Identifier of the dataset is missing.')) identifier_parts = identifier.split('@') if len(identifier_parts) == 1: raise df.Invalid( _('Identifier must be of the form <id>@<slug> where slug is the url of the organization.' ) # noqa ) identifier_org_slug = identifier_parts[1] try: dataset_organization = get_action('organization_show')( {}, { 'id': dataset_owner_org }) if dataset_organization['name'] != identifier_org_slug: raise df.Invalid( _('The identifier "{}" does not end with the organisation slug "{}" of the organization it belongs to.' # noqa .format(identifier, dataset_organization['name'])) # noqa ) except NotFound: raise df.Invalid( _('The selected organization was not found.') # noqa ) try: dataset_for_identifier = get_action('ogdch_dataset_by_identifier')( {}, { 'identifier': identifier }) if dataset_id != dataset_for_identifier['id']: raise df.Invalid( _('Identifier is already in use, it must be unique.')) except NotFound: pass data[key] = identifier
def ecportal_name_validator(val, context): ''' Names must be alphanumeric characters or the symbols '-' and '_'. Unlike CKAN core, names in the EC Portal can contain capital letters. ''' if not isinstance(val, basestring): raise df.Invalid(_('Names must be strings')) if val in ['new', 'edit', 'search']: raise df.Invalid(_('That name cannot be used')) if len(val) < 2: raise df.Invalid(_('Name must be at least %s characters long') % 2) if len(val) > model.PACKAGE_NAME_MAX_LENGTH: raise df.Invalid( _('Name must be a maximum of %i characters long') % model.PACKAGE_NAME_MAX_LENGTH) if not name_match.match(val): raise df.Invalid( _('Name must be alphanumeric ' '(ascii) characters and these symbols: -_')) return val
def callable(key, data, errors, context): v = model.Vocabulary.get(vocab) if not v: raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab) tags = [] for k in data.keys(): if k[0] == 'tags': if data[k].get('vocabulary_id') == v.id: name = data[k].get('display_name', data[k]['name']) tags.append(name) data[key] = tags
def valid_resources(private, context): package = context.get('package') if not package: return private change = get('private', package) != private to_public = private is False or private == u'False' if change and to_public: for resource in get('resources', package): if get('extras', resource).get('valid_content') == 'no': raise df.Invalid(_("Package contains invalid resources")) return private
def func(key: FlattenKey, data: FlattenDataDict, errors: FlattenErrorDict, context: Context): v = model.Vocabulary.get(vocab) if not v: raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab) tags = [] for k in data.keys(): if k[0] == 'tags': if data[k].get('vocabulary_id') == v.id: name = data[k].get('display_name', data[k]['name']) tags.append(name) data[key] = tags
def validator(key, data, errors, context): id = data.get(key[:-1] + ('id', )) identifier = data.get(key[:-1] + ('identifier', )) try: result = get_action('osed_dataset_by_identifier')( {}, { 'identifier': identifier }) if id != result['id']: raise df.Invalid( _('Identifier is already in use, it must be unique.')) except NotFound: pass