Ejemplo n.º 1
0
def list_of_strings_or_lists(key, data, errors, context):
    value = data.get(key)
    if not isinstance(value, list):
        raise df.Invalid('Not a list')
    for x in value:
        if not isinstance(x, basestring) and not isinstance(x, list):
            raise df.Invalid('%s: %s' % ('Neither a string nor a list', x))
Ejemplo n.º 2
0
 def validator(key, data, errors, context):
     extras = data.get(FORM_EXTRAS)
     see_alsos_validated = []
     if extras:
         see_alsos_from_form = get_see_alsos_from_form(extras)
         if see_alsos_from_form:
             context = {}
             for package_name in see_alsos_from_form:
                 try:
                     package = get_action('package_show')(context, {
                         'id': package_name
                     })  # noqa
                     if not package.get('type') == 'dataset':
                         raise df.Invalid(
                             _('{} can not be chosen since it is a {}.'.
                               format(package_name, package.get('type'))))
                     see_alsos_validated.append(
                         {'dataset_identifier': package.get('identifier')})
                 except NotFound:
                     raise df.Invalid(
                         _('Dataset {} could not be found .'.format(
                             package_name)))
     if see_alsos_validated:
         data[key] = json.dumps(see_alsos_validated)
     elif not _jsondata_for_key_is_set(data, key):
         data[key] = '{}'
Ejemplo n.º 3
0
def list_of_strings_or_lists(key: FlattenKey, data: FlattenDataDict,
                             errors: FlattenErrorDict, context: Context):
    value = data.get(key)
    if not isinstance(value, list):
        raise df.Invalid('Not a list')
    for x in value:
        if not isinstance(x, str) and not isinstance(x, list):
            raise df.Invalid('%s: %s' % ('Neither a string nor a list', x))
Ejemplo n.º 4
0
def ecportal_description_validator(val, context):
    '''
   The main description of the dataset. It is often displayed with the package title.
   In particular, it should start with a short sentence that describes the data set succinctly, because the first few words alone may be used in some views of the data sets.
    '''
    if not isinstance(val, basestring):
        raise df.Invalid(_('Description must be strings'))

    if not val:
        raise df.Invalid(_('Enter a Description'))

    return val
Ejemplo n.º 5
0
def ecportal_uri_validator(val, context):
    '''
    This identifier is the universal and main identification of the dataset maintained by the publisher.
    e.g. http://ec.europa.eu/id/uri-1
    '''
    if not val:
        raise df.Invalid(_('URI must be set'))

    if val in ['new', 'edit', 'search']:
        raise df.Invalid(_('That URI cannot be used'))

    return val
Ejemplo n.º 6
0
    def __validate_corpus(self):
        if self.corpus_length > len(self.training_data):
            msg = ('The minimum length of the corpus is {}, '
                   'current corpus has {}').format(int(self.corpus_length),
                                                   len(self.training_data))
            self.logger.debug(msg)
            raise df.Invalid(msg)

        if len(self.training_data) <= self.sequence_length:
            msg = ('The length of the RNN sequence %d, given %d' %
                   (int(sequence_length), len(self.training_data)))
            self.logger.debug(msg)
            raise df.Invalid(msg)

        return self
Ejemplo n.º 7
0
def stringToTags(key, data, error, context):

    tag_string = data.get(key)

    vocab = "None"
    separator = ","

    #Project Vocabularies

    if key[0] == "ILRI_prjsubjects":
        vocab = "ILRI_vocsubjects"

    #Study Vocabularies
    if key[0] == "ILRI_actyregions":
        vocab = "ILRI_vocregions"

    if key[0] == "ILRI_actycountries":
        vocab = "ILRI_voccountries"
        separator = "+"

    if key[0] == "ILRI_actyspecies":
        vocab = "ILRI_vocspecies"

    #At a certain point in the creation of the dataset tagString is an array. So we test for it
    if type(tag_string) is list:
        tags = tag_string
    else:
        tags = tag_string.split(separator)

    if vocab == "None":
        raise df.Invalid(
            _('Tag vocabulary for key "%s" does not exist') % key[0])

    for tag in tags:
        convertToTags(vocab, fixTag(tag), data, error, context)
Ejemplo n.º 8
0
def convertToTags(vocab, newtag, data, error, context):

    new_tags = newtag

    if not new_tags:
        return
    if isinstance(new_tags, basestring):
        new_tags = [new_tags]

    # get current number of tags
    n = 0
    for k in data.keys():
        if k[0] == 'tags':
            n = max(n, k[1] + 1)

    v = model.Vocabulary.get(vocab)
    if not v:
        raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab)
    context['vocabulary'] = v

    for tag in new_tags:
        validators.tag_in_vocabulary_validator(tag, context)

    for num, tag in enumerate(new_tags):
        data[('tags', num + n, 'name')] = tag
        data[('tags', num + n, 'vocabulary_id')] = v.id
Ejemplo n.º 9
0
    def func(key: FlattenKey, data: FlattenDataDict, errors: FlattenErrorDict,
             context: Context):
        new_tags = data.get(key)
        if not new_tags:
            return
        if isinstance(new_tags, str):
            new_tags = [new_tags]

        # get current number of tags
        n = 0
        for k in data.keys():
            if k[0] == 'tags':
                n = max(n, k[1] + 1)

        v = model.Vocabulary.get(vocab)
        if not v:
            raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab)
        context['vocabulary'] = v

        for tag in new_tags:
            validators.tag_in_vocabulary_validator(tag, context)

        for num, tag in enumerate(new_tags):
            data[('tags', num + n, 'name')] = tag
            data[('tags', num + n, 'vocabulary_id')] = v.id
Ejemplo n.º 10
0
def convert_package_name_or_id_to_id(package_name_or_id: Any,
                                     context: Context) -> Any:
    '''Return the package id for the given package name or id.

    The point of this function is to convert package names to ids. If you have
    something that may be a package name or id you can pass it into this
    function and get the id out either way.

    Also validates that a package with the given name or id exists.

    :returns: the id of the package with the given name or id
    :rtype: string
    :raises: ckan.lib.navl.dictization_functions.Invalid if there is no
        package with the given name or id

    '''
    session = context['session']
    result = session.query(
        model.Package).filter_by(id=package_name_or_id).first()
    if not result:
        result = session.query(
            model.Package).filter_by(name=package_name_or_id).first()
    if not result:
        raise df.Invalid('%s: %s' % (_('Not found'), _('Dataset')))
    return result.id
Ejemplo n.º 11
0
def update_frequency_vocab(value, context):
    if not value in DatasetForm.UPDATE_FREQUENCIES:
        raise df.Invalid(
            _('update_frequency should be set to one of following values: ' +
              ', '.join(DatasetForm.UPDATE_FREQUENCIES)))

    return value
    def validator(key, data, errors, context):
        if key not in data:
            data[key] = '[]'
        else:
            temporals = []
            if not data.get(key):
                extras = data.get(FORM_EXTRAS)
                if extras:
                    temporals = get_temporals_from_form(extras)
                    for temporal in temporals:
                        if not temporal['start_date'] and temporal['end_date']:
                            raise df.Invalid(
                                _('A valid temporal must have both start and end date'
                                  )  # noqa
                            )
            else:
                temporals = data[key]

            if not isinstance(temporals, list):
                temporals = json.loads(temporals)

            cleaned_temporals = []
            for temporal in temporals:
                cleaned_temporal = {}
                for k, v in temporal.items():
                    cleaned_temporal[k] = ogdch_date_validator(v)
                cleaned_temporals.append(cleaned_temporal)

            data[key] = json.dumps(cleaned_temporals)
Ejemplo n.º 13
0
    def callable(key, data, errors, context):
        new_tags = data.get(key)
        if not new_tags:
            return
        if isinstance(new_tags, basestring):
            new_tags = [new_tags]

        # get current number of tags
        n = 0
        for k in data.keys():
            if k[0] == 'tags':
                n = max(n, k[1] + 1)

        v = model.Vocabulary.get(vocab)
        if not v:
            raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab)
        context['vocabulary'] = v

        for tag in new_tags:
            if tag_in_vocabulary(tag, v.id, context) is None:
                log.debug(
                    u'Tag "{}" does not exist in vocabulary "{}", will create one'
                    .format(tag, v.name))
                data = {'name': tag, 'vocabulary_id': v.id}
                created_tag = toolkit.get_action('tag_create')(context, data)
                log.debug(u'Created tag "{}" in vocabulary "{}"'.format(
                    created_tag, v.name))

            validators.tag_in_vocabulary_validator(tag, context)

        for num, tag in enumerate(new_tags):
            data[('tags', num + n, 'name')] = tag
            data[('tags', num + n, 'vocabulary_id')] = v.id
Ejemplo n.º 14
0
def is_positive_integer(value, context):
    value = validators.int_validator(value, context)

    if value < 1:
        raise df.Invalid(_('Must be a positive integer'))

    return value
Ejemplo n.º 15
0
def convert_to_tags(key, data, context, vocab):
    new_tags = data[key]
    if not new_tags:
        return
    if isinstance(new_tags, basestring):
        new_tags = [new_tags]

    v = model.Vocabulary.get(vocab)
    if not v:
        raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab)
    context['vocabulary'] = v

    for tag in new_tags:
        validators.tag_in_vocabulary_validator(tag, context)

    for num, tag in enumerate(new_tags):
        if not data['tags']:
            data['tags'] = []

        data['tags'].append({
            'name': tag,
            'vocabulary_id': v.id,
            'state': 'active'
        })

    return data
Ejemplo n.º 16
0
def convert_to_json_if_string(value, context):
    if isinstance(value, basestring):
        try:
            return json.loads(value)
        except ValueError:
            raise df.Invalid(_('Could not parse as valid JSON'))
    else:
        return value
Ejemplo n.º 17
0
def ecportal_date_to_db(value, context):
    if not value:
        return
    try:
        timedate_dict = ECPortalDateType.parse_timedate(value, 'db')
    except field_types.DateConvertError, e:
        # Cannot parse
        raise df.Invalid(str(e))
Ejemplo n.º 18
0
def json_validator(value, context):
    if isinstance(value, dict) or isinstance(value, list):
        return value
    try:
        value = json.loads(value)
    except ValueError:
        raise df.Invalid('Cannot parse JSON')
    return value
Ejemplo n.º 19
0
def valid_resource_url(key, flattened_data, errors, context):
    """ Check whether the resource URL is permitted.

    This requires either an uploaded file, or passing any configured
    whitelist/blacklist checks.
    """

    valid_url(key, flattened_data, errors, context)
    value = flattened_data[key]
    if not value or _is_upload(key, flattened_data):
        LOG.debug("No resource URL found, or file is uploaded; skipping check")
        return

    if not RESOURCE_WHITELIST and not RESOURCE_BLACKLIST:
        LOG.debug("No whitelist or blacklist found; skipping URL check")
        return

    # parse our URL so we can extract the domain
    resource_url = urlparse.urlparse(value)
    if not resource_url:
        LOG.warn("Invalid resource URL")
        raise df.Invalid(_('Must be a valid URL'))

    LOG.debug("Requested resource domain is %s", resource_url.hostname)
    if not resource_url.hostname:
        raise df.Invalid(_('Must be a valid URL'))

    address_resolution = _resolve_address(resource_url.hostname)
    # reject the URL if it matches any blacklist entry
    if RESOURCE_BLACKLIST:
        for domain in RESOURCE_BLACKLIST:
            if _domain_match(resource_url.hostname, domain,
                             address_resolution):
                raise df.Invalid(_('Domain is blocked'))

    # require the URL to match a whitelist entry, if applicable
    if RESOURCE_WHITELIST:
        for domain in RESOURCE_WHITELIST:
            if _domain_match(resource_url.hostname, domain,
                             address_resolution):
                return
        raise df.Invalid(
            _('Must be from an allowed domain: {}').format(RESOURCE_WHITELIST))

    return
Ejemplo n.º 20
0
    def verify_value_in_list(key, data, errors, context):

        value = data.get(key)
        if allow_not_selected and value == not_selected_value:
            del data[key]
            # Don't go further in the validation chain. Ex: convert to extras doesn't need to be called
            raise df.StopOnError
        if not value or value not in value_list:
            raise df.Invalid(_('needs to be a value from the list'))
Ejemplo n.º 21
0
def convert_to_json_if_string(value: Any, context: Context) -> Any:
    """Parse string value as a JSON object.
    """
    if isinstance(value, str):
        try:
            return json.loads(value)
        except ValueError:
            raise df.Invalid(_('Could not parse as valid JSON'))
    else:
        return value
Ejemplo n.º 22
0
def publisher_exists(publisher_name, context):
    '''
    Raises Invalid if the given publisher_name does not exist in the model
    given in the context, otherwise returns the given publisher_name.
    '''
    try:
        logic.get_action('group_show')(context, {'id': publisher_name})
    except logic.NotFound:
        raise df.Invalid('%s: %s' % (_('Publisher not found'), publisher_name))
    return publisher_name
Ejemplo n.º 23
0
def convert_date_to_db(value, context):
    if context.get('api_version') is None:
        return converters.date_to_db(value, context)
    value, error = conv.pipe(
        conv.test_isinstance(basestring),
        conv.cleanup_line,
        conv.test(year_or_month_or_day_re.match),
    )(value, state=conv.default_state)
    if error is not None:
        raise df.Invalid(unicode(error).encode('utf-8'))
    return value
Ejemplo n.º 24
0
def supplier_id_validator(key, data, errors, context):
    value = data.get(key)
    if value is df.missing or not value:
        data.pop(key, None)
        raise df.StopOnError

    model = context['model']
    group = model.Group.get(value)
    if not group:
        raise df.Invalid(tk._('Organization does not exist'))
    data[key] = group.id
Ejemplo n.º 25
0
    def validator(key, data, errors, context):
        identifier = data.get(key[:-1] + ('identifier', ))
        dataset_id = data.get(key[:-1] + ('id', ))
        dataset_owner_org = data.get(key[:-1] + ('owner_org', ))
        if not identifier:
            raise df.Invalid(_('Identifier of the dataset is missing.'))
        identifier_parts = identifier.split('@')
        if len(identifier_parts) == 1:
            raise df.Invalid(
                _('Identifier must be of the form <id>@<slug> where slug is the url of the organization.'
                  )  # noqa
            )
        identifier_org_slug = identifier_parts[1]
        try:
            dataset_organization = get_action('organization_show')(
                {}, {
                    'id': dataset_owner_org
                })
            if dataset_organization['name'] != identifier_org_slug:
                raise df.Invalid(
                    _('The identifier "{}" does not end with the organisation slug "{}" of the organization it belongs to.'  # noqa
                      .format(identifier,
                              dataset_organization['name']))  # noqa
                )
        except NotFound:
            raise df.Invalid(
                _('The selected organization was not found.')  # noqa
            )

        try:
            dataset_for_identifier = get_action('ogdch_dataset_by_identifier')(
                {}, {
                    'identifier': identifier
                })
            if dataset_id != dataset_for_identifier['id']:
                raise df.Invalid(
                    _('Identifier is already in use, it must be unique.'))
        except NotFound:
            pass

        data[key] = identifier
Ejemplo n.º 26
0
def ecportal_name_validator(val, context):
    '''
    Names must be alphanumeric characters or the symbols '-' and '_'.
    Unlike CKAN core, names in the EC Portal can contain capital letters.
    '''
    if not isinstance(val, basestring):
        raise df.Invalid(_('Names must be strings'))

    if val in ['new', 'edit', 'search']:
        raise df.Invalid(_('That name cannot be used'))
    if len(val) < 2:
        raise df.Invalid(_('Name must be at least %s characters long') % 2)
    if len(val) > model.PACKAGE_NAME_MAX_LENGTH:
        raise df.Invalid(
            _('Name must be a maximum of %i characters long') %
            model.PACKAGE_NAME_MAX_LENGTH)
    if not name_match.match(val):
        raise df.Invalid(
            _('Name must be alphanumeric '
              '(ascii) characters and these symbols: -_'))
    return val
Ejemplo n.º 27
0
    def callable(key, data, errors, context):
        v = model.Vocabulary.get(vocab)
        if not v:
            raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab)

        tags = []
        for k in data.keys():
            if k[0] == 'tags':
                if data[k].get('vocabulary_id') == v.id:
                    name = data[k].get('display_name', data[k]['name'])
                    tags.append(name)
        data[key] = tags
Ejemplo n.º 28
0
def valid_resources(private, context):
    package = context.get('package')
    if not package:
        return private

    change = get('private', package) != private
    to_public = private is False or private == u'False'

    if change and to_public:
        for resource in get('resources', package):
            if get('extras', resource).get('valid_content') == 'no':
                raise df.Invalid(_("Package contains invalid resources"))
    return private
Ejemplo n.º 29
0
    def func(key: FlattenKey, data: FlattenDataDict, errors: FlattenErrorDict,
             context: Context):
        v = model.Vocabulary.get(vocab)
        if not v:
            raise df.Invalid(_('Tag vocabulary "%s" does not exist') % vocab)

        tags = []
        for k in data.keys():
            if k[0] == 'tags':
                if data[k].get('vocabulary_id') == v.id:
                    name = data[k].get('display_name', data[k]['name'])
                    tags.append(name)
        data[key] = tags
Ejemplo n.º 30
0
 def validator(key, data, errors, context):
     id = data.get(key[:-1] + ('id', ))
     identifier = data.get(key[:-1] + ('identifier', ))
     try:
         result = get_action('osed_dataset_by_identifier')(
             {}, {
                 'identifier': identifier
             })
         if id != result['id']:
             raise df.Invalid(
                 _('Identifier is already in use, it must be unique.'))
     except NotFound:
         pass