Esempio n. 1
0
def validate_string_length(value, context):
    if isinstance(value, str) and len(value) <= 0:
        raise tk.ValidationError({'constraints': ['Input required']})
    if len(value) > MAX_STRING_LENGTH:
        raise tk.ValidationError(
            {'constraints': ['Input exceed 350 character limit']})
    return value
Esempio n. 2
0
def showcase_admin_add(context, data_dict):
    '''Add a user to the list of showcase admins.

    :param username: name of the user to add to showcase user admin list
    :type username: string
    '''

    toolkit.check_access('ckanext_showcase_admin_add', context, data_dict)

    # validate the incoming data_dict
    validated_data_dict, errors = validate(data_dict,
                                           showcase_admin_add_schema(),
                                           context)

    username = toolkit.get_or_bust(validated_data_dict, 'username')
    try:
        user_id = convert_user_name_or_id_to_id(username, context)
    except toolkit.Invalid:
        raise toolkit.ObjectNotFound

    if errors:
        raise toolkit.ValidationError(errors)

    if ShowcaseAdmin.exists(user_id=user_id):
        raise toolkit.ValidationError(
            "ShowcaseAdmin with user_id '{0}' already exists.".format(user_id),
            error_summary=u"User '{0}' is already a Showcase Admin.".format(
                username))

    # create showcase admin entry
    return ShowcaseAdmin.create(user_id=user_id)
def validate_datarequest_closing(context, request_data):
    if tk.h.closing_circumstances_enabled:
        close_circumstance = request_data.get('close_circumstance', None)
        if not close_circumstance:
            raise tk.ValidationError({tk._('Circumstances'): [tk._('Circumstances cannot be empty')]})
        condition = request_data.get('condition', None)
        if condition:
            if condition == 'nominate_dataset' and request_data.get('accepted_dataset_id', '') == '':
                raise tk.ValidationError({tk._('Accepted dataset'): [tk._('Accepted dataset cannot be empty')]})
            elif condition == 'nominate_approximate_date':
                if request_data.get('approx_publishing_date', '') == '':
                    raise tk.ValidationError({tk._('Approximate publishing date'): [tk._('Approximate publishing date cannot be empty')]})
                try:
                    # This validation is required for the Safari browser as the date type input is not supported and falls back to using a text type input
                    # SQLAlchemy throws an error if the date value is not in the format yyyy-mm-dd
                    datetime.datetime.strptime(request_data.get('approx_publishing_date', ''), '%Y-%m-%d')
                except ValueError:
                    raise tk.ValidationError({tk._('Approximate publishing date'): [tk._('Approximate publishing date must be in format yyyy-mm-dd')]})

    accepted_dataset_id = request_data.get('accepted_dataset_id', '')
    if accepted_dataset_id:
        try:
            tk.get_validator('package_name_exists')(accepted_dataset_id, context)
        except Exception:
            raise tk.ValidationError({tk._('Accepted Dataset'): [tk._('Dataset not found')]})
Esempio n. 4
0
    def update_kobo_details(self, resource_dict, user_name, new_kobo_details):
        """ Update just details to a KoBo resource """

        context = {'user': user_name, 'job': True}
        if not resource_dict:
            raise toolkit.ValidationError(
                {'resource': ["empty resource to update"]})
        kobo_details = resource_dict.get('kobo_details')
        if not kobo_details:
            raise toolkit.ValidationError({
                'kobo_details': [
                    "kobo_details is missing from resource {}".format(
                        resource_dict)
                ]
            })

        kobo_details.update(new_kobo_details)
        kobo_details['kobo_last_updated'] = datetime.datetime.utcnow(
        ).isoformat()

        resource = toolkit.get_action('resource_patch')(
            context, {
                'id': resource_dict['id'],
                'kobo_details': kobo_details,
            })
        return resource
Esempio n. 5
0
def validate_comment(context, request_data):
    comment = request_data.get('comment', '')

    # Check if the data request exists
    try:
        datarequest = tk.get_action(constants.SHOW_DATAREQUEST)(
            context, {
                'id': request_data['datarequest_id']
            })
    except Exception:
        raise tk.ValidationError(
            {tk._('Data Request'): [tk._('Data Request not found')]})

    if not comment or len(comment) <= 0:
        raise tk.ValidationError({
            tk._('Comment'):
            [tk._('Comments must be a minimum of 1 character long')]
        })

    if len(comment) > constants.COMMENT_MAX_LENGTH:
        raise tk.ValidationError({
            tk._('Comment'): [
                tk._('Comments must be a maximum of %d characters long') %
                constants.COMMENT_MAX_LENGTH
            ]
        })

    return datarequest
Esempio n. 6
0
def validate_comment(context, request_data):
    comment = request_data.get("comment", "")

    # Check if the data request exists
    try:
        datarequest = tk.get_action(constants.SHOW_DATAREQUEST)(
            context, {
                "id": request_data["datarequest_id"]
            })
    except Exception:
        raise tk.ValidationError(
            {tk._("Data Request"): [tk._("Data Request not found")]})

    if not comment or len(comment) <= 0:
        raise tk.ValidationError({
            tk._("Comment"):
            [tk._("Comments must be a minimum of 1 character long")]
        })

    if len(comment) > constants.COMMENT_MAX_LENGTH:
        raise tk.ValidationError({
            tk._("Comment"): [
                tk._("Comments must be a maximum of {max_len} characters long"
                     ).format(max_len=constants.COMMENT_MAX_LENGTH)
            ]
        })

    return datarequest
Esempio n. 7
0
def resource_schema_pkey_create(context, data_dict):
    '''Add a primary key to a resource's schema.

    :param resource_id: the ID of the resource
    :type resource_id: string

    :param pkey: the primary key, either the name of one of the fields or a
        list of field names from the resource's schema
    :type pkey: string or iterable of strings

    :returns: the primary key that was created
    :rtype: string or list of strings

    '''
    # Fail if the resource already has a primary key.
    resource_id = toolkit.get_or_bust(data_dict, 'resource_id')
    try:
        pkey = toolkit.get_action('resource_schema_pkey_show')(context,
            {'resource_id': resource_id})
    except exceptions.InvalidResourceIDException:
        raise toolkit.ValidationError(toolkit._("Invalid resource_id"))
    if pkey is not None:
        raise toolkit.ValidationError(toolkit._("The resource already has a "
                                                "primary key"))

    # Otherwise create is the same as update.
    return toolkit.get_action('resource_schema_pkey_update')(context,
                                                             data_dict)
Esempio n. 8
0
def datastore_reindex(resource_id, context):
    '''
    Reindexes that data in the given resource, this involves recreating all versions of each record
    in elasticsearch using the data in mongo. The reindexing work is done by an rq background job
    and therefore this is an async action.

    :param resource_id: the resource id to reindex
    :param context: the context dict from the action call
    :return: a dict containing info about the background job that is doing the reindexing
    '''
    if is_resource_read_only(resource_id):
        raise toolkit.ValidationError(
            u'This resource has been marked as read only')

    last_ingested_version = stats.get_last_ingest(resource_id)
    if last_ingested_version is None:
        raise toolkit.ValidationError(
            u'There is no ingested data for this version')

    resource = toolkit.get_action(u'resource_show')(context, {
        u'id': resource_id
    })
    job = queue_index(resource, None, last_ingested_version.version)

    return {
        u'queued_at': job.enqueued_at.isoformat(),
        u'job_id': job.id,
    }
Esempio n. 9
0
def metadata_standard_index_show(original_action, context, data_dict):
    """
    Get the document structure of a metadata search index.

    :param id: the id or name of the metadata standard
    :type id: string

    :returns: dictionary, or None if the index does not exist
    """
    original_action(context, data_dict)

    id_ = tk.get_or_bust(data_dict, 'id')
    metadata_standard = ckanext_model.MetadataStandard.get(id_)
    if metadata_standard is None:
        raise tk.ObjectNotFound('%s: %s' %
                                (_('Not found'), _('Metadata Standard')))

    indexes = client.get_indexes()
    if not indexes['success']:
        raise tk.ValidationError(indexes['msg'])

    if metadata_standard.name in indexes['indexes']:
        result = client.get_index_mapping(metadata_standard.name)
        if not result['success']:
            raise tk.ValidationError(result['msg'])
        return result['mapping']
Esempio n. 10
0
    def validate(cls, data_dict):
        data_dict = super(AgentQuery, cls).validate(data_dict)
        valid_agent_types = ['person', 'org', 'other']
        agent_type = toolkit.get_or_bust(data_dict, 'agent_type')
        if agent_type not in valid_agent_types:
            raise toolkit.Invalid('Agent type must be one of {0}'.format(
                ', '.join(valid_agent_types)))

        valid_params = {
            'person':
            dict(required=['family_name', 'given_names'],
                 optional=['given_names_first']),
            'org':
            dict(required=['name'], optional=['location']),
            'other':
            dict(required=[], optional=[])
        }
        required = ['agent_type'] + valid_params[agent_type]['required']
        optional = ['user_id', 'external_id', 'external_id_scheme'
                    ] + valid_params[agent_type]['optional']
        for k in required:
            if k not in data_dict:
                raise toolkit.ValidationError(
                    '{0} is a required field.'.format(k))
        if 'external_id' in data_dict and 'external_id_scheme' not in data_dict:
            raise toolkit.ValidationError(
                'external_id_scheme is a required field when external_id is set.'
            )
        all_fields = required + optional
        for k in data_dict:
            if k not in all_fields:
                data_dict[k] = None
        return data_dict
Esempio n. 11
0
    def check_spatial_extra(self, package):
        '''For a given package, looks at the spatial extent (as given in the
        extra "spatial" in GeoJSON format) and records it in PostGIS.

        :param package: 

        '''
        from ckanext.spatial.lib import save_package_extent

        if not package.id:
            log.warning(
                u'Couldn\'t store spatial extent because no id was provided for the '
                u'package')
            return

        # TODO: deleted extra
        for extra in package.extras_list:
            if extra.key == u'spatial':
                if extra.state == u'active' and extra.value:
                    try:
                        log.debug(u'Received: %r' % extra.value)
                        geometry = json.loads(extra.value)
                    except ValueError, e:
                        error_dict = {
                            u'spatial':
                            [u'Error decoding JSON object: %s' % str(e)]
                        }
                        raise toolkit.ValidationError(
                            error_dict,
                            error_summary=package_error_summary(error_dict))
                    except TypeError, e:
                        error_dict = {
                            u'spatial':
                            [u'Error decoding JSON object: %s' % str(e)]
                        }
                        raise toolkit.ValidationError(
                            error_dict,
                            error_summary=package_error_summary(error_dict))

                    try:
                        save_package_extent(package.id, geometry)

                    except ValueError, e:
                        error_dict = {
                            u'spatial':
                            [u'Error creating geometry: %s' % str(e)]
                        }
                        raise toolkit.ValidationError(
                            error_dict,
                            error_summary=package_error_summary(error_dict))
                    except Exception, e:
                        if bool(os.getenv(u'DEBUG')):
                            raise
                        error_dict = {u'spatial': [u'Error: %s' % str(e)]}
                        raise toolkit.ValidationError(
                            error_dict,
                            error_summary=package_error_summary(error_dict))
Esempio n. 12
0
def follow_datarequest(context, data_dict):
    """
    Action to follow a data request. Access rights will be cheked before
    following a datarequest and a NotAuthorized exception will be risen if the
    user is not allowed to follow the given datarequest. ValidationError will
    be risen if the datarequest ID is not included or if the user is already
    following the datarequest. ObjectNotFound will be risen if the given
    datarequest does not exist.

    :param id: The ID of the datarequest to be followed
    :type id: string

    :returns: True
    :rtype: bool
    """

    model = context["model"]
    session = context["session"]
    datarequest_id = data_dict.get("id", "")

    if not datarequest_id:
        raise tk.ValidationError(
            [tk._("Data Request ID has not been included")])

    # Init the data base
    db.init_db(model)

    # Check access
    tk.check_access(constants.FOLLOW_DATAREQUEST, context, data_dict)

    # Get the data request
    result = db.DataRequest.get(id=datarequest_id)
    if not result:
        raise tk.ObjectNotFound(
            tk._("Data Request {datarequest_id} not found in the data base").
            format(datarequest_id=datarequest_id))

    # Is already following?
    user_id = context["auth_user_obj"].id
    result = db.DataRequestFollower.get(datarequest_id=datarequest_id,
                                        user_id=user_id)
    if result:
        raise tk.ValidationError(
            [tk._("The user is already following the given Data Request")])

    # Store the data
    follower = db.DataRequestFollower()
    follower.datarequest_id = datarequest_id
    follower.user_id = user_id
    follower.time = datetime.datetime.now()

    session.add(follower)
    session.commit()

    return True
def agent_update(context, data_dict):
    '''
    Action for updating an :class:`~ckanext.attribution.model.agent.Agent` record. Different
    fields are required by different agent types.

    :param id: ID of the record to update
    :type id: str
    :param agent_type: broad type of agent; usually 'person' or 'org'
    :type agent_type: str, optional
    :param family_name: family name of an person [person only]
    :type family_name: str, optional
    :param given_names: given name(s) or initials of an person [person only]
    :type given_names: str, optional
    :param given_names_first: whether given names should be displayed before the family name
                              (default True) [person only]
    :type given_names_first: bool, optional
    :param user_id: the ID for a registered user of this CKAN instance associated with this agent
                    [person only]
    :type user_id: str, optional
    :param name: name of an organisation [org only]
    :type name: str, optional
    :param context:
    :param data_dict:
    :returns: The updated agent record.
    :rtype: dict

    '''
    toolkit.check_access('agent_update', context, data_dict)
    try:
        item_id = data_dict.get('id')
    except KeyError:
        raise toolkit.ValidationError('Record ID must be provided.')
    current_record = AgentQuery.read(item_id)
    old_citation_name = current_record.citation_name
    if 'agent_type' not in data_dict:
        agent_type = current_record.agent_type
    else:
        agent_type = data_dict.get('agent_type')
    data_dict['agent_type'] = agent_type
    data_dict = AgentQuery.validate(data_dict)
    new_agent = AgentQuery.update(item_id, **data_dict)
    if new_agent.citation_name != old_citation_name:
        # if the name has been updated, the author strings need to be updated everywhere else too
        agent_id_column = AgentContributionActivityQuery.m.agent_id
        contrib_activities = AgentContributionActivityQuery.search(agent_id_column == item_id)
        packages = list(set([c.contribution_activity.package.id for c in contrib_activities]))
        for p in packages:
            author_string = get_author_string(package_id=p)
            toolkit.get_action('package_revise')({}, {'match': {'id': p},
                                                      'update': {'author': author_string}})
    if new_agent is None:
        raise toolkit.ValidationError('Unable to update agent. Check the fields are valid.')
    return new_agent.as_dict()
Esempio n. 14
0
def news_subscription_create(context, data_dict):
    '''Create a subscription .

        :param notify_by_mail: Setting if user should be notified by mail for news.
        :type notify_by_mail: boolean, default False

        :returns: the newly created subscription object
        :rtype: dictionary

        '''

    l.check_access('news_subscription', context, data_dict)
    news_subscription_schema = schema.news_subscription_schema()
    data, errors = df.validate(data_dict, news_subscription_schema, context)

    if errors:
        raise t.ValidationError(errors)

    model = context['model']
    user = context['user']
    user_obj = model.User.get(user)

    notify_by_mail = data_dict.get('notify_by_mail', False)

    is_subscribed = ckanextNewsSubscriptions.get_subscription(user_obj.id)

    if is_subscribed:
        raise t.ValidationError('Subscribtion for user %s already exists!' %
                                user_obj.name)

    subscription = ckanextNewsSubscriptions(subscriber_id=user_obj.id,
                                            notify_by_mail=notify_by_mail)
    subscription.save()
    out = subscription.as_dict()
    send_email_condition = config.get('testing', False)
    if not send_email_condition:
        # Notify the user via email
        # TODO: E-mail notifications should be sent asynchronous using celery tasks
        # TODO: Setup email server for testing mode
        vars = {
            'site_title_dk': config_option_show('ckan.site_title', 'da_DK'),
            'site_title_en': config_option_show('ckan.site_title', 'en'),
            'site_url': config.get('ckan.site_url'),
            'user_name': user_obj.name
        }
        msg_body = render_jinja2('emails/news_subscriptions.txt', vars)
        msg_subject = render_jinja2('emails/news_subscriptions_subject.txt',
                                    vars)
        send_email(msg_body, user_obj.email, msg_subject)

    return out
Esempio n. 15
0
    def parse_notification(self, request_data):
        my_host = request.host

        fields = ['customer_name', 'resources']

        for field in fields:
            if field not in request_data:
                raise tk.ValidationError(
                    {'message': '%s not found in the request' % field})

        # Parse the body
        resources = request_data['resources']
        user_name = request_data['customer_name']
        datasets = []

        if not isinstance(user_name, six.string_types):
            raise tk.ValidationError(
                {'message': 'Invalid customer_name format'})

        if not isinstance(resources, list):
            raise tk.ValidationError({'message': 'Invalid resources format'})

        for resource in resources:
            if isinstance(resource, dict) and 'url' in resource:
                parsed_url = urlparse(resource['url'])
                dataset_name = re.findall('^/dataset/([^/]+).*$',
                                          parsed_url.path)

                resource_url = parsed_url.netloc
                if ':' in my_host and ':' not in resource_url:
                    # Add the default port depending on the protocol
                    default_port = '80' if parsed_url.protocol == 'http' else '443'
                    resource_url = resource_url + default_port

                if len(dataset_name) == 1:
                    if resource_url == my_host:
                        datasets.append(dataset_name[0])
                    else:
                        raise tk.ValidationError({
                            'message':
                            'Dataset %s is associated with the CKAN instance located at %s, expected %s'
                            % (dataset_name[0], resource_url, my_host)
                        })
            else:
                raise tk.ValidationError(
                    {'message': 'Invalid resource format'})

        return {'users_datasets': [{'user': user_name, 'datasets': datasets}]}
Esempio n. 16
0
def user_extra_update(context, data_dict):
    '''Update user extra parameter.

        :param key: Key of the parameter you want to update.
        :type key: string

        :param value: The new value of the parameter.
        :type value: string

        '''

    logic.check_access('user_extra', context, data_dict)
    data, errors = df.validate(data_dict, user_extra_schema(), context)

    if errors:
        raise toolkit.ValidationError(errors)

    model = context.get('model')
    user = context.get('user')
    user_obj = model.User.get(user)
    user_id = user_obj.id
    key = data.get('key')
    value = data.get('value')

    user_extra = UserExtra.get(user_id, key)
    if user_extra is None:
        raise logic.NotFound

    user_extra.key = key
    user_extra.value = value
    user_extra.save()

    return _table_dictize(user_extra, context)
    def upload(self, max_size=2):
        """Actually upload the file.

        max_size is the maximum file size to accept in megabytes
        (note that not all backends will support this limitation).
        """
        if self._uploaded_file is not None:
            _log.debug("Initiating file upload for %s, storage is %s", self._filename, self._storage)
            size = get_uploaded_size(self._uploaded_file)
            _log.debug("Detected file size: %s", size)
            if size and max_size and size > max_size * MB:
                raise toolkit.ValidationError({'upload': ['File upload too large']})

            mimetype = get_uploaded_mimetype(self._uploaded_file)
            _log.debug("Detected file MIME type: %s", mimetype)
            stored = self._storage.upload(_get_underlying_file(self._uploaded_file),
                                          self._filename,
                                          self._object_type,
                                          mimetype=mimetype)
            _log.debug("Finished uploading file %s, %d bytes written to storage", self._filename, stored)
            self._clear = True

        if self._clear \
                and self._old_filename \
                and not is_absolute_http_url(self._old_filename):
            _log.debug("Clearing old asset file: %s", self._old_filename)
            self._storage.delete(self._old_filename)
Esempio n. 18
0
def add_multipolygon_filter(search, coordinates):
    '''
    Adds a multipolygon filter query to the search object and returns a new search object. Only the
    first group in each polygon grouping will be used as elasticsearch doesn't support this type of
    query yet (this kind of query is used for vacating space inside a polygon, like a donut for
    example.

    If more than one group is included then they are included as an or with a minimum must match of
    1.

    :param search: the current elasticsearch DSL object
    :param coordinates: a list of a list of a list of a list of at least 3 lon/lat pairs (i.e.
                        [[[[-16, 44], [-13.1, 34.8], [15.99, 35], [5, 49]]]])
    :return: a search object
    '''
    filters = []
    for group in coordinates:
        points = group[0]
        if len(points) < 3:
            raise toolkit.ValidationError(
                'Not enough points in the polygon, must be 3 or more')

        options = {
            FIELD: {
                # format the polygon point data appropriately
                'points': [{
                    'lat': float(point[1]),
                    'lon': float(point[0])
                } for point in points],
            },
        }
        filters.append(Q('geo_polygon', **options))
    # add the filter to the search as an or
    return search.filter(Q('bool', should=filters, minimum_should_match=1))
Esempio n. 19
0
def package_to_tabular_data_format(context, data_dict):
    '''Return the given CKAN package in Tabular Data Format.

    This returns just the data package metadata in JSON format (what would be
    the contents of the datapackage.json file), it does not return the whole
    multi-file package including datapackage.json file and additional data
    files.

    If a zipstream.ZipFile object is provided with key "pkg_zipstream" in the
    context dict, then a datapackage.json file and data files for each of the
    package's resources (if the resource has a file uploaded to the FileStore)
    will be added into the zipstream.

    :param package_id: the ID of the package
    :type package_id: string

    :returns: the data package metadata
    :rtype: JSON

    '''
    try:
        package_id = data_dict['id']
    except KeyError:
        raise toolkit.ValidationError({'id': 'missing id'})

    pkg_dict = toolkit.get_action('package_show')(context,
                                                  {'name_or_id': package_id})
    return tdf.convert_to_tdf(pkg_dict, pkg_zipstream=context.get('pkg_zipstream'))
def create_geom_columns(context, data_dict):
    """Add geom column to the given resource, and optionally populate them.

    @param context: Current context
    @param data_dict: Parameters:
      - resource_id: The resource for which to create geom columns; REQUIRED
      - latitude_field: The existing latitude field in the column, optional unless populate is true
      - longitude_field: The existing longitude field in the column, optional unless populate is true
      - populate: If true then pre-populate the geom fields using the latitude
                  and longitude fields. Defaults to true.
      - index: If true then create an index on the created columns.
               Defaults to true.
    """
    try:
        resource_id = data_dict['resource_id']
    except KeyError:
        raise toolkit.ValidationError({
            'resource_id': 'A Resource id is required'
        })
    if 'populate' in data_dict:
        populate = data_dict['populate']
    else:
        populate = True
    if 'index' in data_dict:
        index = data_dict['index']
    else:
        index = True

    with get_connection(write=True) as connection:
        create_postgis_columns(resource_id, connection)
        if index:
            create_postgis_index(resource_id, connection)

    if populate:
        update_geom_columns(context, data_dict)
Esempio n. 21
0
def show_datarequest_comment(context, data_dict):
    """
    Action to retrieve a comment. Access rights will be checked before getting
    the comment and a NotAuthorized exception will be risen if the user is not
    allowed to get the comment

    :param id: The ID of the comment to be retrieved
    :type id: string

    :returns: A dict with the following fields: id, user_id, datarequest_id,
        time and comment
    :rtype: dict
    """

    model = context["model"]
    comment_id = data_dict.get("id", "")

    # Check id
    if not comment_id:
        raise tk.ValidationError([tk._("Comment ID has not been included")])

    # Init the data base
    db.init_db(model)

    # Check access
    tk.check_access(constants.SHOW_DATAREQUEST_COMMENT, context, data_dict)

    # Get comments
    result = db.Comment.get(id=comment_id)
    if not result:
        raise tk.ObjectNotFound(
            tk._("Comment {comment_id} not found in the data base").format(
                comment_id=comment_id))

    return _dictize_comment(result[0])
Esempio n. 22
0
def package_showcase_list(context, data_dict):
    '''List showcases associated with a package.

    :param package_id: id or name of the package
    :type package_id: string

    :rtype: list of dictionaries
    '''

    toolkit.check_access('ckanext_package_showcase_list', context, data_dict)

    # validate the incoming data_dict
    validated_data_dict, errors = validate(data_dict,
                                           package_showcase_list_schema(),
                                           context)

    if errors:
        raise toolkit.ValidationError(errors)

    # get a list of showcase ids associated with the package id
    showcase_id_list = ShowcasePackageAssociation.get_showcase_ids_for_package(
        validated_data_dict['package_id'])

    showcase_list = []
    if showcase_id_list is not None:
        for showcase_id in showcase_id_list:
            showcase = toolkit.get_action('package_show')(context, {
                'id': showcase_id
            })
            showcase_list.append(showcase)

    return showcase_list
def dataset_show_as_dataset(context, data_dict):
    '''Return the given CKAN dataset into a Data Package.

    This returns just the data package metadata in JSON format (what would be
    the contents of the datapackage.json file), it does not return the whole
    multi-file package including datapackage.json file and additional data
    files.

    :param id: the ID of the dataset
    :type id: string

    :returns: the datapackage metadata
    :rtype: JSON

    '''
    try:
        dataset_id = data_dict['id']
    except KeyError:
        raise toolkit.ValidationError({'id': 'missing id'})

    dataset_dict = toolkit.get_action('package_show')(context, {
        'id': dataset_id
    })

    zip_buffer = io.BytesIO()
    with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED, False) as zf:
        for resource in dataset_dict["resources"]:
            if resource["mimetype"] is not None:
                r = requests.get(resource["url"], verify=False)
                file_name = resource["url"].split("/")
                zf.writestr(file_name[-1], r.content)
        zf.writestr("dataset.json", json.dumps(dataset_dict, indent=2))
    return zip_buffer.getvalue()
Esempio n. 24
0
def package_add_source(context, data_dict):
    '''List sources associated with a package.

     :param package_id: id or name of the package
     :type package_id: string

     :rtype: list of dictionaries
     '''

    toolkit.check_access('package_update', context, data_dict)

    # validate the incoming data_dict
    validated_data_dict, errors = validate(data_dict,
                                           package_add_source_schema(),
                                           context)

    if errors:
        raise toolkit.ValidationError(errors)

    package_id, source_link, source_title = toolkit.get_or_bust(
        validated_data_dict, ['package_id', 'source_link', 'source_title'])
    if not source_link.startswith('http'):
        source_dict = toolkit.get_action('package_show')(
            context, {
                'name_or_id': source_link
            })
        return DatasetSourceModel.create(package_id=package_id,
                                         source_link=source_dict['id'],
                                         source_title=source_dict['title'])

    return DatasetSourceModel.create(package_id=package_id,
                                     source_link=source_link,
                                     source_title=source_title)
    def update_entry(cls, **kwargs):
        """
        Update name or long_url for the existing entry
        :param kwargs:
        :return:
        """
        _id = kwargs.get(u'id', u'')
        rec = cls.get_entry(id=_id)
        if not rec:
            raise toolkit.ObjectNotFound(u"Given record not found")

        if u'name' in kwargs:
            rec.name = convert_to_name(kwargs[u'name'])

        if u'long_url' in kwargs:
            validators.validate_url(kwargs[u'long_url'])
            rec.long_url = kwargs[u'long_url']

        try:
            model.Session.commit()
        except orm_exceptions.IntegrityError as e:
            log.error(e)
            model.Session.flush()
            raise toolkit.ValidationError(u"Another Record with same name or url exists")

        return rec
Esempio n. 26
0
def all_issues(get_query_dict):
    query, errors = toolkit.navl_validate(
        dict(get_query_dict), schema.issue_dataset_controller_schema())
    if errors:
        raise toolkit.ValidationError(errors)
    query.pop('__extras', None)
    return _search_issues(include_datasets=True, **query)
Esempio n. 27
0
def issues_for_dataset(dataset_id, get_query_dict):
    query, errors = toolkit.navl_validate(
        dict(get_query_dict), schema.issue_dataset_controller_schema())
    if errors:
        raise toolkit.ValidationError(errors)
    query.pop('__extras', None)
    return _search_issues(dataset_id=dataset_id, **query)
def validate_datarequest(context, request_data):

    errors = {}

    # Check name
    if len(request_data['title']) > constants.NAME_MAX_LENGTH:
        errors[tk._('Title')] = [tk._('Title must be a maximum of %d characters long') % constants.NAME_MAX_LENGTH]

    if not request_data['title']:
        errors[tk._('Title')] = [tk._('Title cannot be empty')]

    # Title is only checked in the database when it's correct
    avoid_existing_title_check = context['avoid_existing_title_check'] if 'avoid_existing_title_check' in context else False

    if 'Title' not in errors and not avoid_existing_title_check:
        if db.DataRequest.datarequest_exists(request_data['title']):
            errors[tk._('Title')] = [tk._('That title is already in use')]

    # Check description
    if len(request_data['description']) > constants.DESCRIPTION_MAX_LENGTH:
        errors[tk._('Description')] = [tk._('Description must be a maximum of %d characters long') % constants.DESCRIPTION_MAX_LENGTH]

    # Check organization
    if request_data['organization_id']:
        try:
            tk.get_validator('group_id_exists')(request_data['organization_id'], context)
        except Exception:
            errors[tk._('Organization')] = [tk._('Organization is not valid')]

    if len(errors) > 0:
        raise tk.ValidationError(errors)
Esempio n. 29
0
def harvest_send_job_to_gather_queue(context, data_dict):
    '''
    Sends a harvest job to the gather queue.

    :param id: the id of the harvest job
    :type id: string
    '''
    log.info('Send job to gather queue: %r', data_dict)

    job_id = logic.get_or_bust(data_dict, 'id')
    job = toolkit.get_action('harvest_job_show')(context, {'id': job_id})

    check_access('harvest_send_job_to_gather_queue', context, job)

    # gather queue
    publisher = get_gather_publisher()

    # Check the source is active
    source = harvest_source_show(context, {'id': job['source_id']})
    if not source['active']:
        raise toolkit.ValidationError('Source is not active')

    job_obj = HarvestJob.get(job['id'])
    job_obj.status = job['status'] = u'Running'
    job_obj.save()
    publisher.send({'harvest_job_id': job['id']})
    log.info('Sent job %s to the gather queue', job['id'])

    return harvest_job_dictize(job_obj, context)
    def create_entry(cls, **kwargs):
        """
        Create entry
        :return:
        """
        long_url = kwargs.get(u'long_url', u'')
        name = kwargs.get(u'name', u'')
        validators.validate_url(long_url)

        rec = cls.get_entry(id=None, url=long_url)

        if not rec:
            _uuid = get_uuid()
            rec = cls()
            rec.id = _uuid
            rec.name = convert_to_name(name or _uuid)
            rec.long_url = long_url
            model.Session.add(rec)
            try:
                model.Session.commit()
            except orm_exceptions.IntegrityError as e:
                log.error(e)
                model.Session.flush()
                raise toolkit.ValidationError(u"Something wrong while saving the record")

        return rec