Beispiel #1
0
    def export_datasets(self, out_folder):
        '''
        Export datasets as RDF to an output folder.
        '''
        import urlparse
        import urllib2
        import pylons.config as config
        import ckan.model as model
        import ckan.logic as logic
        import ckan.lib.helpers as h

        # Create output folder if not exists
        if not os.path.isdir( out_folder ):
            os.makedirs( out_folder )

        fetch_url = config['ckan.site_url']
        user = logic.get_action('get_site_user')({'model': model, 'ignore_auth': True}, {})
        context = {'model': model, 'session': model.Session, 'user': user['name']}
        dataset_names = logic.get_action('package_list')(context, {})
        for dataset_name in dataset_names:
            dd = logic.get_action('package_show')(context, {'id':dataset_name })
            if not dd['state'] == 'active':
                continue

            url = h.url_for( controller='package',action='read',
                                                  id=dd['name'])

            url = urlparse.urljoin(fetch_url, url[1:]) + '.rdf'
            try:
                fname = os.path.join( out_folder, dd['name'] ) + ".rdf"
                r = urllib2.urlopen(url).read()
                with open(fname, 'wb') as f:
                    f.write(r)
            except IOError, ioe:
                sys.stderr.write( str(ioe) + "\n" )
    def validate_config(self,config):
        print 'VALIDATE CONFIG'
        if not config:
            return config

        try:
            config_obj = json.loads(config)

            if 'api_version' in config_obj:
                try:
                    int(config_obj['api_version'])
                except ValueError:
                    raise ValueError('api_version must be an integer')

            if 'default_tags' in config_obj:
                if not isinstance(config_obj['default_tags'],list):
                    raise ValueError('default_tags must be a list')

            if 'default_groups' in config_obj:
                if not isinstance(config_obj['default_groups'],list):
                    raise ValueError('default_groups must be a list')

                # Check if default groups exist
                context = {'model':model,'user':c.user}
                for group_name in config_obj['default_groups']:
                    try:
                        group = get_action('group_show')(context,{'id':group_name})
                    except NotFound,e:
                        raise ValueError('Default group not found')

            if 'default_extras' in config_obj:
                if not isinstance(config_obj['default_extras'],dict):
                    raise ValueError('default_extras must be a dictionary')

            if 'from' in config_obj:
                try:
                    datetime.strptime(config_obj['from'], '%Y-%m-%dT%H:%M:%SZ')
                except ValueError:
                    raise ValueError("Incorrect date format, should be yyyy-mm-ddThh:mm:ssZ ")
                #    int(config_obj['from'])

            if 'until' in config_obj:
                try:
                    datetime.strptime(config_obj['until'], '%Y-%m-%dT%H:%M:%SZ')
                except ValueError:
                    raise ValueError("Incorrect date format, should be yyyy-mm-ddThh:mm:ssZ ")

            #if 'vocabulary' in config_obj:
            #    if config_obj['vocabulary'] != 'metashare' and config_obj['vocabulary'] != 'olac' and config_obj['vocabulary'] !='cmdi':
            #        raise ValueError("Incorrect vocabulary, please choose between metashare, olac and cmdi")
            #else:
            #    raise ValueError("Please provide a vocabulary, you can choose between metashare, olac and cmdi")

            if 'user' in config_obj:
                # Check if user exists
                context = {'model':model,'user':c.user}
                try:
                    user = get_action('user_show')(context,{'id':config_obj.get('user')})
                except NotFound,e:
                    raise ValueError('User not found')
 def _delete_vocab(self, vocab_name):
     log.info('Deleting vocabulary "%s"' % vocab_name)
     context = {'model': model, 'session': model.Session, 'user': self.user_name}
     vocab = logic.get_action('vocabulary_show')(context, {'id': vocab_name})
     for tag in vocab.get('tags'):
         logic.get_action('tag_delete')(context, {'id': tag['id']})
     logic.get_action('vocabulary_delete')(context, {'id': vocab['id']})
Beispiel #4
0
def organization_import(data):
    """ Import organizations """
    _load_config()
    context = _create_context()
    configuration = simplejson.loads(data)
    data_url = configuration.get('url')
    public_organization = configuration.get('public_organization', False)

    with closing(urllib2.urlopen(data_url)) as source:
        data = simplejson.load(source)

        for item in data:
            values = {}
            if isinstance(item, basestring):
                values['title'] = item.strip()
                values['name'] = munge_title_to_name(values['title']).lower()
            else:
                values['name'] = item.pop('name')
                values['title'] = item.pop('title')
                values['description'] = item.pop('description', None)
                values['extras'] = [{'key': key, 'value': value} for key, value in item.iteritems()]
            values['id'] = values['name']

            if public_organization:
                values['extras'] = [{'key': 'public_adminstration_organization', 'value': 'true'}]
            try:
                get_action('organization_show')(context, {'id': values['id']})
                # Do not override organizations
            except NotFound:
                get_action('organization_create')(context, values)
    def import_dms(self, url):

        input_records = requests.get(url).json()
        to_import = {}
        for record in input_records:
            to_import[record['identifier']] = record

        user = logic.get_action('get_site_user')(
            {'model': model, 'ignore_auth': True}, {}
        )

        collected_ids = set(to_import.keys())

        existing_package_ids = set([row[0] for row in
                       model.Session.query(model.Package.id).from_statement(
                           '''select p.id
                           from package p
                           join package_extra pe on p.id = pe.package_id
                           where pe.key = 'metadata-source' and pe.value = 'dms'
                           and p.state = 'active' ''')])

        context = {}
        context['user'] = self.user_name

        for num, package_id in enumerate(collected_ids - existing_package_ids):
            context.pop('package', None)
            context.pop('group', None)
            new_package = to_import[package_id]
            try:
                print str(datetime.datetime.now()) + ' Created id ' + package_id
                logic.get_action('datajson_create')(context, new_package)
            except Exception, e:
                print str(datetime.datetime.now()) + ' Error when creating id ' + package_id
                print e
    def _import_data_node(self, node, parents, namespace=''):
        if node.tag == ('{%s}leaf' % namespace):
            self._import_dataset(node, parents, namespace)

        elif node.tag == ('{%s}branch' % namespace):
            # add title translations to translation table
            title = node.find('{%s}title[@language="en"]' % namespace)
            if title is not None:
                log.info('Updating translations for theme %s' % title.text)
                translations = []

                for lang in self.data_import_langs:
                    lang_node = node.find('{%s}title[@language="%s"]' % (namespace, lang))
                    if lang_node is not None:
                        translations.append({
                            'term': unicode(title.text),
                            'term_translation': unicode(lang_node.text),
                            'lang_code': lang
                        })

                if translations:
                    context = {'model': model, 'session': model.Session,
                                'user': self.user_name, 'extras_as_string': True}
                    logic.get_action('term_translation_update_many')(
                        context, {'data': translations}
                    )

            # add this node as a parent and import child nodes
            for child in node:
                self._import_data_node(child, parents + [node], namespace)

        elif node.tag == ('{%s}children' % namespace):
            for child in node:
                self._import_data_node(child, parents, namespace)
    def export_datasets(self, output_folder, fetch_url):
        '''
        Export datasets as RDF to an output folder.
        '''
        import urlparse

        user = logic.get_action('get_site_user')({'model': model, 'ignore_auth': True}, {})
        context = {'model': model, 'session': model.Session, 'user': user['name']}
        dataset_names = logic.get_action('package_list')(context, {})
        for dataset_name in dataset_names:
            dataset_dict = logic.get_action('package_show')(context, {'id': dataset_name})
            if not dataset_dict['state'] == 'active':
                continue

            url = ckan.lib.helpers.url_for(controller='package',
                                           action='read',
                                           id=dataset_dict['name'])

            url = urlparse.urljoin(fetch_url, url[1:]) + '.rdf'

            try:
                filename = os.path.join(output_folder, dataset_dict['name']) + ".rdf"
                print filename
                r = requests.get(url, auth=('ec', 'ecportal'))
                with open(filename, 'wb') as f:
                    f.write(r.content)
            except IOError, ioe:
                sys.stderr.write(str(ioe) + "\n")
    def dashboard_activity_stream(self, user_id, filter_type=None, filter_id=None,
                              offset=0):
        '''Return the dashboard activity stream of the current user.

        :param user_id: the id of the user
        :type user_id: string

        :param filter_type: the type of thing to filter by
        :type filter_type: string

        :param filter_id: the id of item to filter by
        :type filter_id: string

        :returns: an activity stream as an HTML snippet
        :rtype: string

        '''
        context = {'model': model, 'session': model.Session, 'user': c.user}

        if filter_type:
            action_functions = {
                'dataset': 'package_activity_list_html',
                'user': '******',
                'group': 'group_activity_list_html',
                'organization': 'organization_activity_list_html' #ADDED BY HDX
            }
            action_function = logic.get_action(action_functions.get(filter_type))
            return action_function(context, {'id': filter_id, 'offset': offset})
        else:
            return logic.get_action('dashboard_activity_list_html')(
                context, {'offset': offset})
    def _create_webstorer_task(self, resource):
        user = get_action('get_site_user')({'model': model,
                                            'ignore_auth': True,
                                            'defer_commit': True}, {})
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': user.get('apikey'),
            'username': user.get('name'),
            'webstore_url': self.webstore_url
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))
        webstorer_task = send_task("webstorer.upload", [context, data])

        # update the task_status table
        webstorer_task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'webstorer',
            'key': u'celery_task_id',
            'value': webstorer_task.task_id,
            'last_updated': datetime.now().isoformat()
        }
        
        archiver_task_context = {
            'model': model, 
            'session': model.Session, 
            'user': user.get('name'),
            'defer_commit': True
        }
        
        get_action('task_status_update')(archiver_task_context, webstorer_task_status)
Beispiel #10
0
    def test_harvest_update_records(self):

        # Create source
        source_fixture = {"url": u"http://127.0.0.1:8999/single/dataset1.xml", "type": u"gemini-single"}

        source, first_job = self._create_source_and_job(source_fixture)

        first_obj = self._run_job_for_single_document(first_job)

        first_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was created
        assert first_package_dict
        assert first_obj.current == True
        assert first_obj.package

        # Create and run a second job, the package should not be updated
        second_job = self._create_job(source.id)

        second_obj = self._run_job_for_single_document(second_job)

        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)

        second_package_dict = get_action("package_show_rest")(self.context, {"id": first_obj.package_id})

        # Package was not updated
        assert second_package_dict, first_package_dict["id"] == second_package_dict["id"]
        assert first_package_dict["metadata_modified"] == second_package_dict["metadata_modified"]
        assert not second_obj.package, not second_obj.package_id
        assert second_obj.current == False, first_obj.current == True

        # Create and run a third job, forcing the importing to simulate an update in the package
        third_job = self._create_job(source.id)
        third_obj = self._run_job_for_single_document(third_job, force_import=True)

        # For some reason first_obj does not get updated after the import_stage,
        # and we have to force a refresh to get the actual DB values.
        Session.remove()
        Session.add(first_obj)
        Session.add(second_obj)
        Session.add(third_obj)

        Session.refresh(first_obj)
        Session.refresh(second_obj)
        Session.refresh(third_obj)

        third_package_dict = get_action("package_show_rest")(self.context, {"id": third_obj.package_id})

        # Package was updated
        assert third_package_dict, first_package_dict["id"] == third_package_dict["id"]
        assert third_package_dict["metadata_modified"] > second_package_dict["metadata_modified"]
        assert third_obj.package, third_obj.package_id == first_package_dict["id"]
        assert third_obj.current == True
        assert second_obj.current == False
        assert first_obj.current == False
Beispiel #11
0
    def list(self, id):
        """ List all related items for a specific dataset """
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author,
                   'auth_user_obj': c.userobj,
                   'for_view': True}
        data_dict = {'id': id}

        try:
            logic.check_access('package_show', context, data_dict)
        except logic.NotFound:
            base.abort(404, base._('Dataset not found'))
        except logic.NotAuthorized:
            base.abort(401, base._('Not authorized to see this page'))

        try:
            c.pkg_dict = logic.get_action('package_show')(context, data_dict)
            c.related_list = logic.get_action('related_list')(context,
                                                              data_dict)
            c.pkg = context['package']
            c.resources_json = h.json.dumps(c.pkg_dict.get('resources', []))
        except logic.NotFound:
            base.abort(404, base._('Dataset not found'))
        except logic.NotAuthorized:
            base.abort(401, base._('Unauthorized to read package %s') % id)

        return base.render("package/related_list.html")
Beispiel #12
0
def process():
    response.headers["Content-Type"] = "application/json"

    package_id = request.params.get('package_id')
    hasAccess(package_id)

    if hasAppliedDoi(package_id):
        return {'error':True, 'message':'Cannot edit resource of package with applied DOI'}

    sheet_id = request.params.get('sheet_id')
    resource_id = request.params.get('resource_id')
    ids = request.params.get('resource_ids')

    if sheet_id == "":
        sheet_id = None

    try:
        options = json.loads(request.params.get('options'))
    except:
        options = {}

    # option, if a resource id and a datasheet id are passed, then the full 'merged' view will be return

    # only allow specified options
    safeOptions = {}
    for option in parseOptions:
        if option in options:
            safeOptions[option] = options[option]

    # see if we are editing multiple files or just one
    result = []
    if ids is not None:
        ids = json.loads(ids)
        for resource_id in ids:
            workspace.prepareFile(package_id, resource_id, sheet_id, safeOptions)
            result.append(query.getResource(resource_id))
    else:
        workspace.prepareFile(package_id, resource_id, sheet_id, safeOptions)
        result = query.getResource(resource_id, sheet_id)

    # update the dataset, so the metadata timestamp changes
    context = {'model': model, 'user': c.user}
    pkg = logic.get_action('package_show')(context, {'id': package_id})

    # use this counter to poke the dataset.  This will update the last modified timestamps
    # required for 'updated since last pushed UI'
    resourceUpdateCount = utils.getPackageExtra('resourceUpdateCount', pkg)
    if resourceUpdateCount is None:
        resourceUpdateCount = 1
    else:
        resourceUpdateCount = int(resourceUpdateCount) + 1
    utils.setPackageExtra('resourceUpdateCount', resourceUpdateCount, pkg)
    pkg = logic.get_action('package_update')(context, pkg)

    result = {
        'metadata_modified' : pkg.get('metadata_modified'),
        'result' : result
    }

    return jsonStringify(result)
Beispiel #13
0
def _group_or_org_member_create(context, data_dict, is_org=False):
    # creator of group/org becomes an admin
    # this needs to be after the repo.commit or else revisions break
    model = context['model']
    user = context['user']
    session = context['session']

    schema = ckan.logic.schema.member_schema()
    data, errors = _validate(data_dict, schema, context)

    username = _get_or_bust(data_dict, 'username')
    role = data_dict.get('role')
    group_id = data_dict.get('id')
    group = model.Group.get(group_id)
    result = session.query(model.User).filter_by(name=username).first()
    if result:
        user_id = result.id
    member_dict = {
        'id': group.id,
        'object': user_id,
        'object_type': 'user',
        'capacity': role,
    }
    member_create_context = {
        'model': model,
        'user': user,
        'session': session
    }
    logic.get_action('member_create')(member_create_context, member_dict)
Beispiel #14
0
    def _save_new(self, context):
        model = context['model']
        context['ignore_auth'] = True

        try:
            data_dict = logic.clean_dict(unflatten(
                logic.tuplize_dict(logic.parse_params(request.params))))
            context['message'] = data_dict.get('log_message', '')
            captcha.check_recaptcha(request)

            organization = get_action('inventory_organization_by_inventory_id')(
                context, {'id': data_dict['inventory_organization_id']})

            password = str(random.SystemRandom().random())
            data_dict['password1'] = password
            data_dict['password2'] = password
            data_dict['state'] = model.State.PENDING
            user = get_action('user_create')(context, data_dict)

            data_dict = {
                'id': organization['id'],
                'role': 'editor',
                'username': user['name']
            }
            logic.get_action('organization_member_create')(context, data_dict)
        except NotAuthorized:
            abort(401, _('Unauthorized to create user %s') % '')
        except NotFound, e:
            abort(404, _('User or organization not found'))
Beispiel #15
0
def _delete(params):
    # remove resource from disk - normally this doesn't happen
    context = {'model': model, 'user': c.user}
    resource = logic.get_action('resource_show')(context, params)

    # if package has DOI applied, resources cannot be modified
    if hasAppliedDoi(resource.get('package_id')):
        return {'error':True, 'message':'Cannot delete resource of package with applied DOI'}

    # this will fire error if user does not have access
    logic.get_action('resource_delete')(context, params)
    id = params.get('id')

    # if the resource is a file upload, remove from disk
    if resource.get('url_type') == "upload":
        upload = uploader.ResourceUpload(resource)
        path = upload.get_path(resource['id'])
        if os.path.exists(path):
            os.remove(path)

    # remove resource from EcoSIS
    deleteUtil.resource(resource.get("package_id"), id)

    params['success'] = True
    return params
Beispiel #16
0
    def test_non_sysadmin_can_update_related_item(self):
        '''Non-sysadmins can change related item.

        If they don't change the featured field.
        '''

        context = {
            'model': model,
            'user': '******',
            'session': model.Session
        }

        data_dict = {
            'title': 'Title',
            'description': 'Description',
            'type': 'visualization',
            'url': 'http://ckan.org',
            'image_url': 'http://ckan.org/files/2012/03/ckanlogored.png',
        }

        # Create the related item as annafan
        result = logic.get_action('related_create')(context, data_dict)

        # Try to change it to a featured item
        result['title'] = 'New Title'

        result = logic.get_action('related_update')(context, result)
        assert_equal(result['title'], 'New Title')
Beispiel #17
0
def _group_or_org_member_create(context, data_dict, is_org=False):
    # creator of group/org becomes an admin
    # this needs to be after the repo.commit or else revisions break
    model = context['model']
    user = context['user']
    session = context['session']

    schema = ckan.logic.schema.member_schema()
    data, errors = _validate(data_dict, schema, context)

    username = _get_or_bust(data_dict, 'username')
    role = data_dict.get('role')
    group_id = data_dict.get('id')
    group = model.Group.get(group_id)
    result = model.User.get(username)
    if result:
        user_id = result.id
    else:
        message = _(u'User {username} does not exist.').format(
            username=username)
        raise ValidationError({'message': message}, error_summary=message)
    member_dict = {
        'id': group.id,
        'object': user_id,
        'object_type': 'user',
        'capacity': role,
    }
    member_create_context = {
        'model': model,
        'user': user,
        'session': session,
        'ignore_auth': context.get('ignore_auth'),
    }
    logic.get_action('member_create')(member_create_context, member_dict)
Beispiel #18
0
    def _related_create(self, title, description, type, url, image_url):
        usr = logic.get_action('get_site_user')({'model':model,'ignore_auth': True},{})

        context = dict(model=model, user=usr['name'], session=model.Session)
        data_dict = dict(title=title,description=description,
                         url=url,image_url=image_url,type=type)
        return logic.get_action("related_create")( context, data_dict )
Beispiel #19
0
    def test_non_sysadmin_changes_related_items_featured_field_fails(self):
        '''Non-sysadmins cannot change featured field'''

        context = {
            'model': model,
            'user': '******',
            'session': model.Session
        }

        data_dict = {
            'title': 'Title',
            'description': 'Description',
            'type': 'visualization',
            'url': 'http://ckan.org',
            'image_url': 'http://ckan.org/files/2012/03/ckanlogored.png',
        }

        # Create the related item as annafan
        result = logic.get_action('related_create')(context, data_dict)

        # Try to change it to a featured item
        result['featured'] = 1

        try:
            logic.get_action('related_update')(context, result)
        except logic.NotAuthorized, e:
            # Check it's the correct authorization error
            assert 'featured' in str(e)
Beispiel #20
0
 def _reset_a_datasets_owner_org(self):
     rev = model.repo.new_revision()
     get_action('package_owner_org_update')(
         {'model': model, 'user': self.sysadmin['name'],
          'ignore_auth': True},
         {'id': 'adataset',
          'organization_id': 'national-health-service'})
Beispiel #21
0
    def _create_task(self, resource):
        user = get_action('get_site_user')({'model': model,
                                            'ignore_auth': True,
                                            'defer_commit': True}, {})
        context = json.dumps({
            'site_url': self.site_url,
            'apikey': user.get('apikey')
        })
        data = json.dumps(resource_dictize(resource, {'model': model}))

        task_id = make_uuid()
        task_status = {
            'entity_id': resource.id,
            'entity_type': u'resource',
            'task_type': u'qa',
            'key': u'celery_task_id',
            'value': task_id,
            'error': u'',
            'last_updated': datetime.now().isoformat()
        }
        task_context = {
            'model': model,
            'user': user.get('name'),
        }

        get_action('task_status_update')(task_context, task_status)
        celery.send_task("qa.update", args=[context, data], task_id=task_id)
Beispiel #22
0
def taxonomy_term_delete(context, data_dict):
    """ Deletes a taxonomy term.

    This call deletes all of its child terms (those in narrower scope).

    :returns: The newly deleted term
    :rtype: A dictionary
    """
    _check_access('taxonomy_term_delete', context, data_dict)
    model = context['model']

    term = logic.get_action('taxonomy_term_show')(context, data_dict)

    all_terms = logic.get_action('taxonomy_term_list')(
        context, {'id': term['taxonomy_id']})
    _append_children(term, all_terms)

    # Now we just need to iterate through the tree and gather up IDs
    # to delete....
    ids = _gather(term, 'id')
    todelete = model.Session.query(TaxonomyTerm).\
        filter(TaxonomyTerm.id.in_(ids))

    if len(ids):
        map(model.Session.delete, todelete)
        model.Session.commit()

    return term
    def list(self):
        # Force the action cache to load. If we skip this step and an action
        # hasn't been called elsewhere yet, we may only get the built-in
        # actions.
        try:
            logic.get_action('~_~')
        except KeyError:
            pass

        action_filters = []

        # There appears to be no other way of getting all the action routes,
        # so we iterate over the (private) action cache.
        action_methods = sorted(
            (k, v) for k, v in logic._actions.iteritems()
            if all(action_filter(k, v) for action_filter in action_filters)
        )

        return plugins.toolkit.render(
            'api_list.html',
            extra_vars={
                'action_methods': action_methods,
                'publish_string': publish_string,
                'publish_parts': partial(
                    publish_parts,
                    writer_name='html',
                    settings_overrides={
                        'field_name_limit': 0,
                        'syntax_highlight': 'short'
                    }
                ),
                'trim': trim
            }
        )
Beispiel #24
0
def harvest_sources_reindex(context, data_dict):
    """
        Reindexes all harvest source datasets with the latest status
    """
    log.info("Reindexing all harvest sources")
    check_access("harvest_sources_reindex", context, data_dict)

    model = context["model"]

    packages = (
        model.Session.query(model.Package)
        .filter(model.Package.type == DATASET_TYPE_NAME)
        .filter(model.Package.state == u"active")
        .all()
    )

    package_index = PackageSearchIndex()

    reindex_context = {"defer_commit": True}
    for package in packages:
        get_action("harvest_source_reindex")(reindex_context, {"id": package.id})

    package_index.commit()

    return True
    def validate_config(self,config):
        if not config:
            return config

        try:
            config_obj = json.loads(config)

            if 'default_tags' in config_obj:
                if not isinstance(config_obj['default_tags'],list):
                    raise ValueError('default_tags must be a list')

            if 'default_groups' in config_obj:
                if not isinstance(config_obj['default_groups'],list):
                    raise ValueError('default_groups must be a list')

                # Check if default groups exist
                context = {'model':model,'user':c.user}
                for group_name in config_obj['default_groups']:
                    try:
                        group = get_action('group_show')(context,{'id':group_name})
                    except NotFound,e:
                        raise ValueError('Default group not found')

            if 'default_extras' in config_obj:
                if not isinstance(config_obj['default_extras'],dict):
                    raise ValueError('default_extras must be a dictionary')

            if 'user' in config_obj:
                # Check if user exists
                context = {'model':model,'user':c.user}
                try:
                    user = get_action('user_show')(context,{'id':config_obj.get('user')})
                except NotFound,e:
                    raise ValueError('User not found')
Beispiel #26
0
    def import_stage(self):

        if len(self.args) >= 2:
            source_id_or_name = unicode(self.args[1])
            context = {'model': model, 'session': model.Session,
                       'user': self.admin_user['name']}
            source = get_action('harvest_source_show')(
                context, {'id': source_id_or_name})
            source_id = source['id']
        else:
            source_id = None

        context = {'model': model, 'session': model.Session,
                   'user': self.admin_user['name'],
                   'join_datasets': not self.options.no_join_datasets,
                   'segments': self.options.segments}

        objs_count = get_action('harvest_objects_import')(context,{
                'source_id': source_id,
                'harvest_object_id': self.options.harvest_object_id,
                'package_id': self.options.package_id,
                'guid': self.options.guid,
                })

        print '%s objects reimported' % objs_count
    def handle_groups(self, harvest_object, doc):

        validated_groups = []

        if 'topic_group_mapping' in self.source_config:
            for groupname, codelist in self.source_config['topic_group_mapping'].iteritems():
                addgroup = False
                for code in codelist:
                    if addgroup:  # no need to search further matches for this group
                        break
                    for topic in doc.get_topics():
                        topic_code = topic[:4]
                        if topic_code == code:
                            log.debug('Found matching topic code %s' % code)
                            addgroup = True
                            break

                if addgroup:
                    log.info('Adding group %s ' % groupname)

                    try:
                        context = {'model': model, 'session': Session, 'user': '******'}
                        data_dict = {'id': groupname}
                        get_action('group_show')(context, data_dict)
                        validated_groups.append({'name': groupname})
                    except NotFound:
                        log.warning('Group %s is not available' % (groupname))

        return validated_groups
Beispiel #28
0
def taxonomy_term_tree(context, data_dict):
    """
    Returns the taxonomy terms as a tree for the given taxonomy

    If 'language' is specified in data_dict (default is en) then
    it will return the label for that language.

    :returns: The taxonomy's terms as a tree structure
    :rtype: A list of dictionaries.
    """
    _check_access('taxonomy_term_tree', context, data_dict)

    model = context['model']

    context['with_terms'] = False
    taxonomy = logic.get_action('taxonomy_show')(context, data_dict)

    all_terms = logic.get_action('taxonomy_term_list')(context, data_dict)
    top_terms = [t for t in all_terms if t['parent_id'] is None]

    # We definitely don't want each term to be responsible for loading
    # it's children.  Maybe we should do that here per top_term using the
    # results from top_list. Need to measure but I think up to 100 or so items
    # this may well be faster than lots of DB trips.  May need optimising.
    terms = [_append_children(term, all_terms) for term in top_terms]

    return terms
    def import_stage(self, harvest_object):
        log = logging.getLogger(__name__ + '.VariantStore.import')
        log.debug('VariantStoreHarvester import_stage for job: %r', harvest_job)

        if not harvest_object:
            log.error('No harvest object received')
            return False

        self._set_config(harvest_object.job.source.config, log=log)

        df = pd.read_json(harvest_object.content)

        tsv = df.to_tsv(sep="\t")

        context = {
            'model': model,
            'session': Session,
            'user': self._get_user_name()}

        # Query CKAN for our dataset.
        try:
            package = get_action('package_show')(context, {'id': self.dataset_name })
        except NotFound:
            package = get_action('package_create')(context, self.package)

        # Update or create the resource.
        try:
            pass
            # resource = [resource in package['resources'] if resource['name'] == self.resource_name]
        except StopIteration:
            pass
    def test_private_record(self):
        '''
        Test that private packages are not listed but public packages are

        '''
        package_1_data = deepcopy(TEST_DATADICT)
        model.User(name="privateuser", sysadmin=True).save()
        organization = get_action('organization_create')({'user': '******'}, {'name': 'private-organization', 'title': "Private organization"})
        package_1_data['private'] = True
        package_1_data['owner_org'] = organization['name']
        package_1_data['name'] = 'private-package'
        package1 = get_action('package_create')({'user': '******'}, package_1_data)
        package_2_data = deepcopy(TEST_DATADICT)
        package_2_data['private'] = False
        package_2_data['owner_org'] = organization['name']
        package_2_data['name'] = 'public-package'

        url = url_for('/oai')
        result = self.app.get(url, {'verb': 'ListIdentifiers', 'set': 'private-organization', 'metadataPrefix': 'oai_dc'})

        root = lxml.etree.fromstring(result.body)
        print root
        self.assertFalse(root.xpath("//o:header", namespaces=self._namespaces))
        package2 = get_action('package_create')({'user': '******'}, package_2_data)
        result = self.app.get(url, {'verb': 'ListIdentifiers', 'set': 'private-organization', 'metadataPrefix': 'oai_dc'})
        root = lxml.etree.fromstring(result.body)
        for header in root.xpath("//o:header", namespaces=self._namespaces):
            identifier = header.xpath("string(o:identifier)", namespaces=self._namespaces)
            print identifier
            self.assertTrue(identifier == package2['id'])

        get_action('organization_delete')({'user': '******'}, {'id': organization['id']})
Beispiel #31
0
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return

        pkg_dict['data_dict'] = json.dumps(pkg_dict)

        if config.get('ckan.cache_validated_datasets', True):
            package_plugin = lib_plugins.lookup_package_plugin(
                pkg_dict.get('type'))

            schema = package_plugin.show_package_schema()
            validated_pkg_dict, errors = _validate(pkg_dict, schema, {
                'model': model, 'session': model.Session})
            pkg_dict['validated_data_dict'] = json.dumps(validated_pkg_dict,
                cls=ckan.lib.navl.dictization_functions.MissingNullEncoder)

        # add to string field for sorting
        title = pkg_dict.get('title')
        if title:
            pkg_dict['title_string'] = title

        if (not pkg_dict.get('state')) or ('active' not in pkg_dict.get('state')):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + pkg_dict.keys()

        # include the extras in the main namespace
        extras = pkg_dict.get('extras', [])
        for extra in extras:
            key, value = extra['key'], extra['value']
            if isinstance(value, (tuple, list)):
                value = " ".join(map(unicode, value))
            key = ''.join([c for c in key if c in KEY_CHARS])
            pkg_dict['extras_' + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop('extras', None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop('tags', [])
        context = {'model': model}

        for tag in tags:
            if tag.get('vocabulary_id'):
                data = {'id': tag['vocabulary_id']}
                vocab = logic.get_action('vocabulary_show')(context, data)
                key = u'vocab_%s' % vocab['name']
                if key in pkg_dict:
                    pkg_dict[key].append(tag['name'])
                else:
                    pkg_dict[key] = [tag['name']]
            else:
                non_vocab_tag_names.append(tag['name'])

        pkg_dict['tags'] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop('groups', [])

        # we use the capacity to make things private in the search index
        if pkg_dict['private']:
            pkg_dict['capacity'] = 'private'
        else:
            pkg_dict['capacity'] = 'public'

        pkg_dict['groups'] = [group['name'] for group in groups]

        # if there is an owner_org we want to add this to groups for index
        # purposes
        if pkg_dict.get('organization'):
           pkg_dict['organization'] = pkg_dict['organization']['name']
        else:
           pkg_dict['organization'] = None

        # tracking
        tracking_summary = pkg_dict.pop('tracking_summary', None)
        if tracking_summary:
            pkg_dict['views_total'] = tracking_summary['total']
            pkg_dict['views_recent'] = tracking_summary['recent']

        # flatten the structure for indexing:
        for resource in pkg_dict.get('resources', []):
            for (okey, nkey) in [('description', 'res_description'),
                                 ('format', 'res_format'),
                                 ('url', 'res_url')]:
                pkg_dict[nkey] = pkg_dict.get(nkey, []) + [resource.get(okey, u'')]
        pkg_dict.pop('resources', None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(rel['type'])
            rel_dict[type].append(model.Package.get(rel['subject_package_id']).name)
        for rel in subjects:
            type = rel['type']
            rel_dict[type].append(model.Package.get(rel['object_package_id']).name)
        for key, value in rel_dict.iteritems():
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        # Save dataset type
        pkg_dict['dataset_type'] = pkg_dict['type']

        # clean the dict fixing keys and dates
        # FIXME where are we getting these dirty keys from?  can we not just
        # fix them in the correct place or is this something that always will
        # be needed?  For my data not changing the keys seems to not cause a
        # problem.
        new_dict = {}
        for key, value in pkg_dict.items():
            key = key.encode('ascii', 'ignore')
            if key.endswith('_date'):
                try:
                    value = parse(value).isoformat() + 'Z'
                except ValueError:
                    continue
            new_dict[key] = value
        pkg_dict = new_dict

        for k in ('title', 'notes', 'title_string'):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict['metadata_created'] += 'Z'
        pkg_dict['metadata_modified'] += 'Z'

        # mark this CKAN instance as data source:
        pkg_dict['site_id'] = config.get('ckan.site_id')

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ['title']:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib
        pkg_dict['index_id'] = hashlib.md5('%s%s' % (pkg_dict['id'],config.get('ckan.site_id'))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, 'Plugin must return non empty package dict on index'

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            if not asbool(config.get('ckan.search.solr_commit', 'true')):
                commit = False
            conn.add_many([pkg_dict], _commit=commit)
        except solr.core.SolrException, e:
            msg = 'Solr returned an error: {0} {1} - {2}'.format(
                e.httpcode, e.reason, e.body[:1000] # limit huge responses
            )
            raise SearchIndexError(msg)
Beispiel #32
0
    def index_package(self, pkg_dict, defer_commit=False):
        if pkg_dict is None:
            return
        pkg_dict['data_dict'] = json.dumps(pkg_dict)

        # add to string field for sorting
        title = pkg_dict.get('title')
        if title:
            pkg_dict['title_string'] = title

        if (not pkg_dict.get('state')) or ('active'
                                           not in pkg_dict.get('state')):
            return self.delete_package(pkg_dict)

        index_fields = RESERVED_FIELDS + pkg_dict.keys()

        # include the extras in the main namespace
        extras = pkg_dict.get('extras', [])
        for extra in extras:
            key, value = extra['key'], json.loads(extra['value'])
            if isinstance(value, (tuple, list)):
                value = " ".join(map(unicode, value))
            key = ''.join([c for c in key if c in KEY_CHARS])
            pkg_dict['extras_' + key] = value
            if key not in index_fields:
                pkg_dict[key] = value
        pkg_dict.pop('extras', None)

        # add tags, removing vocab tags from 'tags' list and adding them as
        # vocab_<tag name> so that they can be used in facets
        non_vocab_tag_names = []
        tags = pkg_dict.pop('tags', [])
        context = {'model': model}

        for tag in tags:
            if tag.get('vocabulary_id'):
                data = {'id': tag['vocabulary_id']}
                vocab = logic.get_action('vocabulary_show')(context, data)
                key = u'vocab_%s' % vocab['name']
                if key in pkg_dict:
                    pkg_dict[key].append(tag['name'])
                else:
                    pkg_dict[key] = [tag['name']]
            else:
                non_vocab_tag_names.append(tag['name'])

        pkg_dict['tags'] = non_vocab_tag_names

        # add groups
        groups = pkg_dict.pop('groups', [])

        # Capacity is different to the default only if using organizations
        # where the dataset is only in one group. We will add the capacity
        # from the single group that it is a part of if we have a group
        if len(groups):
            pkg_dict['capacity'] = groups[0].get('capacity', 'public')
        else:
            pkg_dict['capacity'] = 'public'

        pkg_dict['groups'] = [group['name'] for group in groups]

        # tracking
        tracking_summary = pkg_dict.pop('tracking_summary', None)
        if tracking_summary:
            pkg_dict['views_total'] = tracking_summary['total']
            pkg_dict['views_recent'] = tracking_summary['recent']

        # flatten the structure for indexing:
        for resource in pkg_dict.get('resources', []):
            for (okey, nkey) in [('description', 'res_description'),
                                 ('format', 'res_format'), ('url', 'res_url')]:
                pkg_dict[nkey] = pkg_dict.get(nkey,
                                              []) + [resource.get(okey, u'')]
        pkg_dict.pop('resources', None)

        rel_dict = collections.defaultdict(list)
        subjects = pkg_dict.pop("relationships_as_subject", [])
        objects = pkg_dict.pop("relationships_as_object", [])
        for rel in objects:
            type = model.PackageRelationship.forward_to_reverse_type(
                rel['type'])
            rel_dict[type].append(
                model.Package.get(rel['subject_package_id']).name)
        for rel in subjects:
            type = rel['type']
            rel_dict[type].append(
                model.Package.get(rel['object_package_id']).name)
        for key, value in rel_dict.iteritems():
            if key not in pkg_dict:
                pkg_dict[key] = value

        pkg_dict[TYPE_FIELD] = PACKAGE_TYPE

        pkg_dict = dict([(k.encode('ascii', 'ignore'), v)
                         for (k, v) in pkg_dict.items()])

        for k in ('title', 'notes', 'title_string'):
            if k in pkg_dict and pkg_dict[k]:
                pkg_dict[k] = escape_xml_illegal_chars(pkg_dict[k])

        # modify dates (SOLR is quite picky with dates, and only accepts ISO dates
        # with UTC time (i.e trailing Z)
        # See http://lucene.apache.org/solr/api/org/apache/solr/schema/DateField.html
        pkg_dict['metadata_created'] += 'Z'
        pkg_dict['metadata_modified'] += 'Z'

        # mark this CKAN instance as data source:
        pkg_dict['site_id'] = config.get('ckan.site_id')

        # Strip a selection of the fields.
        # These fields are possible candidates for sorting search results on,
        # so we strip leading spaces because solr will sort " " before "a" or "A".
        for field_name in ['title']:
            try:
                value = pkg_dict.get(field_name)
                if value:
                    pkg_dict[field_name] = value.lstrip()
            except KeyError:
                pass

        # add a unique index_id to avoid conflicts
        import hashlib
        pkg_dict['index_id'] = hashlib.md5(
            '%s%s' % (pkg_dict['id'], config.get('ckan.site_id'))).hexdigest()

        for item in PluginImplementations(IPackageController):
            pkg_dict = item.before_index(pkg_dict)

        assert pkg_dict, 'Plugin must return non empty package dict on index'

        # send to solr:
        try:
            conn = make_connection()
            commit = not defer_commit
            conn.add_many([pkg_dict], _commit=commit)
        except Exception, e:
            log.exception(e)
            raise SearchIndexError(e)
    def test_01_package_list(self, app):
        res = json.loads(
            app.post(
                "/api/action/package_list",
                headers={
                    "content-type": "application/json"
                },
            ).body)
        assert res["success"] is True
        assert len(res["result"]) == 2
        assert "warandpeace" in res["result"]
        assert "annakarenina" in res["result"]
        assert "/api/3/action/help_show?name=package_list" in res["help"]

        res = json.loads(
            app.post("/api/action/package_list", json={
                "limit": 1
            }).body)
        assert res["success"] is True
        assert len(res["result"]) == 1
        assert ("warandpeace" in res["result"]
                or "annakarenina" in res["result"])

        # Test GET request
        res = json.loads(app.get("/api/action/package_list").body)
        assert len(res["result"]) == 2
        assert "warandpeace" in res["result"]
        assert "annakarenina" in res["result"]

        # def test_01_package_list_private(self):
        tests.call_action_api(
            app,
            "organization_create",
            name="test_org_2",
            apikey=self.sysadmin_user.apikey,
        )

        tests.call_action_api(
            app,
            "package_create",
            name="public_dataset",
            owner_org="test_org_2",
            apikey=self.sysadmin_user.apikey,
        )

        res = tests.call_action_api(app, "package_list")

        assert len(res) == 3
        assert "warandpeace" in res
        assert "annakarenina" in res
        assert "public_dataset" in res

        tests.call_action_api(
            app,
            "package_create",
            name="private_dataset",
            owner_org="test_org_2",
            private=True,
            apikey=self.sysadmin_user.apikey,
        )

        res = tests.call_action_api(app, "package_list")
        assert len(res) == 3
        assert "warandpeace" in res
        assert "annakarenina" in res
        assert "public_dataset" in res
        assert "private_dataset" not in res

        # def test_02_package_autocomplete_match_name(self):
        res = app.post("/api/action/package_autocomplete",
                       json={
                           "q": "war",
                           "limit": 5
                       })
        res_obj = json.loads(res.body)
        assert res_obj["success"]
        assert res_obj["result"][0]["name"] == "warandpeace"
        assert res_obj["result"][0]["title"] == "A Wonderful Story"
        assert res_obj["result"][0]["match_field"] == "name"
        assert res_obj["result"][0]["match_displayed"] == "warandpeace"

        # def test_02_package_autocomplete_match_title(self):
        res = app.post("/api/action/package_autocomplete",
                       json={
                           "q": "won",
                           "limit": 5
                       })
        res_obj = json.loads(res.body)
        assert res_obj["success"]
        assert res_obj["result"][0]["name"] == "warandpeace"
        assert res_obj["result"][0]["title"] == "A Wonderful Story"
        assert res_obj["result"][0]["match_field"] == "title"
        assert (res_obj["result"][0]["match_displayed"] ==
                "A Wonderful Story (warandpeace)")

        # def test_03_create_private_package(self):

        # Make an organization, because private datasets must belong to one.
        organization = tests.call_action_api(
            app,
            "organization_create",
            name="test_org",
            apikey=self.sysadmin_user.apikey,
        )

        # Create a dataset without specifying visibility
        package_dict = {
            "extras": [{
                "key": u"original media",
                "value": u'"book"'
            }],
            "license_id":
            u"other-open",
            "maintainer_email":
            None,
            "name":
            u"annakarenina_vis",
            "notes":
            u"Some test now",
            "resources": [
                {
                    "alt_url": u"alt123",
                    "description": u"Full text.",
                    "extras": {
                        u"alt_url": u"alt123",
                        u"size": u"123"
                    },
                    "format": u"plain text",
                    "hash": u"abc123",
                    "position": 0,
                    "url": u"http://datahub.io/download/",
                },
                {
                    "alt_url": u"alt345",
                    "description": u"Index of the novel",
                    "extras": {
                        u"alt_url": u"alt345",
                        u"size": u"345"
                    },
                    "format": u"JSON",
                    "hash": u"def456",
                    "position": 1,
                    "url": u"http://datahub.io/index.json",
                },
            ],
            "tags": [{
                "name": u"russian"
            }, {
                "name": u"tolstoy"
            }],
            "title":
            u"A Novel By Tolstoy",
            "url":
            u"http://datahub.io",
            "owner_org":
            organization["id"],
            "version":
            u"0.7a",
        }
        package_created = tests.call_action_api(
            app,
            "package_create",
            apikey=self.sysadmin_user.apikey,
            **package_dict)
        assert package_created["private"] is False

        # Create a new one, explicitly saying it is public
        package_dict["name"] = u"annakareninanew_vis_public"
        package_dict["private"] = False

        package_created_public = tests.call_action_api(
            app,
            "package_create",
            apikey=self.sysadmin_user.apikey,
            **package_dict)
        assert package_created_public["private"] is False

        # Create a new one, explicitly saying it is private
        package_dict["name"] = u"annakareninanew_vis_private"
        package_dict["private"] = True

        package_created_private = tests.call_action_api(
            app,
            "package_create",
            apikey=self.sysadmin_user.apikey,
            **package_dict)
        assert package_created_private["private"] is True

        # def test_41_create_resource(self):

        anna_id = model.Package.by_name(u"annakarenina").id
        resource = {"package_id": anna_id, "url": "http://new_url"}
        api_key = six.ensure_text(model.User.get("testsysadmin").apikey)
        res = app.post(
            "/api/action/resource_create",
            json=resource,
            extra_environ={"Authorization": str(api_key)},
        )

        resource = json.loads(res.body)["result"]

        assert resource["url"] == "http://new_url"

        # def test_42_create_resource_with_error(self):

        anna_id = model.Package.by_name(u"annakarenina").id
        resource = {
            "package_id": anna_id,
            "url": "new_url",
            "created": "bad_date",
        }
        api_key = six.ensure_text(model.User.get("testsysadmin").apikey)

        res = app.post(
            "/api/action/resource_create",
            json=resource,
            extra_environ={"Authorization": str(api_key)},
            status=StatusCodes.STATUS_409_CONFLICT,
        )

        assert json.loads(res.body)["error"] == {
            "__type": "Validation Error",
            "created": ["Date format incorrect"],
        }

        # def test_10_user_create_parameters_missing(self):
        user_dict = {}

        res = app.post(
            "/api/action/user_create",
            json=user_dict,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
            status=StatusCodes.STATUS_409_CONFLICT,
        )
        res_obj = json.loads(res.body)
        assert res_obj["error"] == {
            "__type": "Validation Error",
            "name": ["Missing value"],
            "email": ["Missing value"],
            "password": ["Missing value"],
        }
        assert "/api/3/action/help_show?name=user_create" in res_obj["help"]
        assert res_obj["success"] is False

        # def test_11_user_create_wrong_password(self):
        user_dict = {
            "name": "test_create_from_action_api_2",
            "email": "*****@*****.**",
            "password": "******",
        }  # Too short

        res = app.post(
            "/api/action/user_create",
            json=user_dict,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
            status=StatusCodes.STATUS_409_CONFLICT,
        )

        res_obj = json.loads(res.body)
        assert "/api/3/action/help_show?name=user_create" in res_obj["help"]
        assert res_obj["success"] is False
        assert res_obj["error"] == {
            "__type": "Validation Error",
            "password": ["Your password must be 8 characters or longer"],
        }

        # def test_12_user_update(self):
        normal_user_dict = {
            "id": self.normal_user.id,
            "name": self.normal_user.name,
            "fullname": "Updated normal user full name",
            "email": "*****@*****.**",
            "about": "Updated normal user about",
        }

        sysadmin_user_dict = {
            "id": self.sysadmin_user.id,
            "fullname": "Updated sysadmin user full name",
            "email": "*****@*****.**",
            "about": "Updated sysadmin user about",
        }

        # Normal users can update themselves
        res = app.post(
            "/api/action/user_update",
            json=normal_user_dict,
            extra_environ={"Authorization": str(self.normal_user.apikey)},
        )

        res_obj = json.loads(res.body)
        assert "/api/3/action/help_show?name=user_update" in res_obj["help"]
        assert res_obj["success"] == True
        result = res_obj["result"]
        assert result["id"] == self.normal_user.id
        assert result["name"] == self.normal_user.name
        assert result["fullname"] == normal_user_dict["fullname"]
        assert result["about"] == normal_user_dict["about"]
        assert "apikey" in result
        assert "created" in result
        assert "display_name" in result
        assert "number_created_packages" in result
        assert not "password" in result

        # Sysadmin users can update themselves
        res = app.post(
            "/api/action/user_update",
            json=sysadmin_user_dict,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )

        res_obj = json.loads(res.body)
        assert "/api/3/action/help_show?name=user_update" in res_obj["help"]
        assert res_obj["success"] == True
        result = res_obj["result"]
        assert result["id"] == self.sysadmin_user.id
        assert result["name"] == self.sysadmin_user.name
        assert result["fullname"] == sysadmin_user_dict["fullname"]
        assert result["about"] == sysadmin_user_dict["about"]

        # Sysadmin users can update all users
        res = app.post(
            "/api/action/user_update",
            json=normal_user_dict,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )

        res_obj = json.loads(res.body)
        assert "/api/3/action/help_show?name=user_update" in res_obj["help"]
        assert res_obj["success"] == True
        result = res_obj["result"]
        assert result["id"] == self.normal_user.id
        assert result["name"] == self.normal_user.name
        assert result["fullname"] == normal_user_dict["fullname"]
        assert result["about"] == normal_user_dict["about"]

        # Normal users can not update other users
        res = app.post(
            "/api/action/user_update",
            json=sysadmin_user_dict,
            extra_environ={"Authorization": str(self.normal_user.apikey)},
            status=StatusCodes.STATUS_403_ACCESS_DENIED,
        )

        res_obj = json.loads(res.body)
        assert "/api/3/action/help_show?name=user_update" in res_obj["help"]
        assert res_obj["error"]["__type"] == "Authorization Error"
        assert res_obj["success"] is False

        # def test_12_user_update_errors(self):
        test_calls = (
            # Empty name
            {
                "user_dict": {
                    "id": self.normal_user.id,
                    "name": "",
                    "email": "*****@*****.**",
                },
                "messages": [("name", "Must be at least 2 characters long")],
            },
            # Invalid characters in name
            {
                "user_dict": {
                    "id": self.normal_user.id,
                    "name": "i++%",
                    "email": "*****@*****.**",
                },
                "messages":
                [("name", "Must be purely lowercase alphanumeric")],
            },
            # Existing name
            {
                "user_dict": {
                    "id": self.normal_user.id,
                    "name": self.sysadmin_user.name,
                    "email": "*****@*****.**",
                },
                "messages": [("name", "That login name is not available")],
            },
            # Missing email
            {
                "user_dict": {
                    "id": self.normal_user.id,
                    "name": self.normal_user.name,
                },
                "messages": [("email", "Missing value")],
            },
        )

        for test_call in test_calls:
            res = app.post(
                "/api/action/user_update",
                json=test_call["user_dict"],
                extra_environ={"Authorization": str(self.normal_user.apikey)},
                status=StatusCodes.STATUS_409_CONFLICT,
            )
            res_obj = json.loads(res.body)
            for expected_message in test_call["messages"]:
                assert expected_message[1] in "".join(
                    res_obj["error"][expected_message[0]])

        # def test_user_delete(self):
        name = "normal_user"
        CreateTestData.create_user(name)
        user = model.User.get(name)
        user_dict = {"id": user.id}
        res = app.post(
            "/api/action/user_delete",
            json=user_dict,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )

        res_obj = json.loads(res.body)
        deleted_user = model.User.get(name)
        assert res_obj["success"] is True
        assert deleted_user.is_deleted(), deleted_user

        # def test_user_delete_requires_data_dict_with_key_id(self):
        user_dict = {"name": "normal_user"}
        res = app.post(
            "/api/action/user_delete",
            json=user_dict,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
            status=StatusCodes.STATUS_409_CONFLICT,
        )

        res_obj = json.loads(res.body)
        assert res_obj["success"] is False
        assert res_obj["error"]["id"] == ["Missing value"]

        # def test_16_user_autocomplete(self):
        # Create deleted user to make sure he won't appear in the user_list
        deleted_user = CreateTestData.create_user("joe")
        deleted_user.delete()
        model.repo.commit()

        # Empty query
        res = app.post(
            "/api/action/user_autocomplete",
            json={},
            status=StatusCodes.STATUS_409_CONFLICT,
        )
        res_obj = json.loads(res.body)
        assert ("/api/3/action/help_show?name=user_autocomplete"
                in res_obj["help"])
        assert res_obj["success"] is False

        # Normal query
        res = app.post("/api/action/user_autocomplete", json={"q": "joe"})
        res_obj = json.loads(res.body)
        assert res_obj["result"][0]["name"] == "joeadmin"
        assert "id", "fullname" in res_obj["result"][0]

        # def test_17_bad_action(self):
        # Empty query
        res = app.post("/api/action/bad_action_name", json={}, status=400)
        res_obj = json.loads(res.body)
        assert (
            res_obj == u"Bad request - Action name not known: bad_action_name")

        # def test_20_task_status_update(self):
        package_created = _add_basic_package(app, u"test_task_status_update")

        task_status = {
            "entity_id": package_created["id"],
            "entity_type": u"package",
            "task_type": u"test_task",
            "key": u"test_key",
            "value": u"test_value",
            "state": u"test_state",
            "error": u"test_error",
        }
        res = app.post(
            "/api/action/task_status_update",
            json=task_status,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_updated = json.loads(res.body)["result"]

        task_status_id = task_status_updated.pop("id")
        task_status_updated.pop("last_updated")
        assert task_status_updated == task_status

        task_status_updated["id"] = task_status_id
        task_status_updated["value"] = u"test_value_2"
        res = app.post(
            "/api/action/task_status_update",
            json=task_status_updated,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_updated_2 = json.loads(res.body)["result"]
        task_status_updated_2.pop("last_updated")
        assert task_status_updated_2 == task_status_updated

        # def test_21_task_status_update_many(self):
        package_created = _add_basic_package(app,
                                             u"test_task_status_update_many")
        task_statuses = {
            "data": [
                {
                    "entity_id": package_created["id"],
                    "entity_type": u"package",
                    "task_type": u"test_task",
                    "key": u"test_task_1",
                    "value": u"test_value_1",
                    "state": u"test_state",
                    "error": u"test_error",
                },
                {
                    "entity_id": package_created["id"],
                    "entity_type": u"package",
                    "task_type": u"test_task",
                    "key": u"test_task_2",
                    "value": u"test_value_2",
                    "state": u"test_state",
                    "error": u"test_error",
                },
            ]
        }
        res = app.post(
            "/api/action/task_status_update_many",
            json=task_statuses,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_statuses_updated = json.loads(res.body)["result"]["results"]
        for i in range(len(task_statuses["data"])):
            task_status = task_statuses["data"][i]
            task_status_updated = task_statuses_updated[i]
            task_status_updated.pop("id")
            task_status_updated.pop("last_updated")
            assert task_status == task_status_updated, (
                task_status_updated,
                task_status,
                i,
            )

        # def test_22_task_status_normal_user_not_authorized(self):
        task_status = {}
        res = app.post(
            "/api/action/task_status_update",
            json=task_status,
            extra_environ={"Authorization": str(self.normal_user.apikey)},
            status=StatusCodes.STATUS_403_ACCESS_DENIED,
        )
        res_obj = json.loads(res.body)
        assert ("/api/3/action/help_show?name=task_status_update"
                in res_obj["help"])
        assert res_obj["success"] is False
        assert res_obj["error"]["__type"] == "Authorization Error"

        # def test_23_task_status_validation(self):
        task_status = {}
        res = app.post(
            "/api/action/task_status_update",
            json=task_status,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
            status=StatusCodes.STATUS_409_CONFLICT,
        )

        # def test_24_task_status_show(self):
        package_created = _add_basic_package(app, u"test_task_status_show")

        task_status = {
            "entity_id": package_created["id"],
            "entity_type": u"package",
            "task_type": u"test_task",
            "key": u"test_task_status_show",
            "value": u"test_value",
            "state": u"test_state",
            "error": u"test_error",
        }
        res = app.post(
            "/api/action/task_status_update",
            json=task_status,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_updated = json.loads(res.body)["result"]

        # make sure show works when giving a task status ID
        res = app.post(
            "/api/action/task_status_show",
            json={"id": task_status_updated["id"]},
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_show = res.json["result"]

        task_status_show.pop("last_updated")
        task_status_updated.pop("last_updated")
        assert task_status_show == task_status_updated, (
            task_status_show,
            task_status_updated,
        )

        # make sure show works when giving a (entity_id, task_type, key) tuple
        res = app.post(
            "/api/action/task_status_show",
            json={
                "entity_id": task_status["entity_id"],
                "task_type": task_status["task_type"],
                "key": task_status["key"],
            },
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_show = json.loads(res.body)["result"]

        task_status_show.pop("last_updated")
        assert task_status_show == task_status_updated, (
            task_status_show,
            task_status_updated,
        )

        # def test_25_task_status_delete(self):
        package_created = _add_basic_package(app, u"test_task_status_delete")

        task_status = {
            "entity_id": package_created["id"],
            "entity_type": u"package",
            "task_type": u"test_task",
            "key": u"test_task_status_delete",
            "value": u"test_value",
            "state": u"test_state",
            "error": u"test_error",
        }
        res = app.post(
            "/api/action/task_status_update",
            json=task_status,
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_updated = json.loads(res.body)["result"]

        res = app.post(
            "/api/action/task_status_delete",
            json={"id": task_status_updated["id"]},
            extra_environ={"Authorization": str(self.sysadmin_user.apikey)},
        )
        task_status_delete = json.loads(res.body)
        assert task_status_delete["success"] == True

        # def test_26_resource_show(self):
        pkg = model.Package.get("annakarenina")
        resource = pkg.resources[0]
        res = app.post("/api/action/resource_show", json={"id": resource.id})
        result = json.loads(res.body)["result"]

        resource_dict = resource_dictize(resource, {"model": model})
        assert result == resource_dict, (result, resource_dict)

        # def test_27_get_site_user_not_authorized(self):
        with pytest.raises(NotAuthorized):
            get_action("get_site_user")({"model": model, "user": ""}, {})
        # user = model.User.get('test.ckan.net')
        # assert not user

        site_id = config.get("ckan.site_id")
        user = get_action("get_site_user")({
            "model": model,
            "ignore_auth": True
        }, {})
        assert user["name"] == site_id

        user = model.User.get(site_id)
        assert user

        user = get_action("get_site_user")({
            "model": model,
            "ignore_auth": True
        }, {})
        assert user["name"] == site_id

        user = model.Session.query(model.User).filter_by(name=site_id).one()
        assert user

        # def test_28_group_package_show(self):
        group_id = model.Group.get("david").id
        group_packages = get_action("group_package_show")(
            {
                "model": model,
                "user": self.normal_user.name,
                "ignore_auth": True,
            },
            {
                "id": group_id
            },
        )
        assert len(group_packages) == 2, group_packages
        group_names = set([g.get("name") for g in group_packages])
        assert group_names == set(["annakarenina", "warandpeace"]), group_names

        # def test_30_status_show(self):
        res = app.post("/api/action/status_show", json={})
        status = json.loads(res.body)["result"]
        assert status["site_title"] == "CKAN"
        assert status["ckan_version"] == ckan.__version__
        assert status["site_url"] == "http://test.ckan.net"

        # def test_31_bad_request_format(self):
        res = app.post("/api/action/package_list",
                       json=six.ensure_str("not a dict"),
                       status=400)

        assert (
            "Bad request - JSON Error: Request data JSON decoded to ") in res
        assert ("'not a dict' but it needs to be a dictionary.") in res
        # def test_31_bad_request_format_not_json(self):
        res = app.post("/api/action/package_list",
                       data="=1",
                       status=400,
                       content_type="application/json")
        assert body_contains(
            res, "Bad request - JSON Error: Error decoding JSON data.")

        # def test_32_get_domain_object(self):
        anna = model.Package.by_name(u"annakarenina")
        assert get_domain_object(model, anna.name).name == anna.name
        assert get_domain_object(model, anna.id).name == anna.name
        group = model.Group.by_name(u"david")
        assert get_domain_object(model, group.name).name == group.name
        assert get_domain_object(model, group.id).name == group.name

        # def test_41_missing_action(self):
        try:
            get_action("unicorns")
            assert False, "We found a non-existent action"
        except KeyError:
            assert True

        # def test_42_resource_search_with_single_field_query(self):
        request_body = {"query": ["description:index"]}
        response = app.post("/api/action/resource_search", json=request_body)
        result = json.loads(response.body)["result"]["results"]
        count = json.loads(response.body)["result"]["count"]

        ## Due to the side-effect of previously run tests, there may be extra
        ## resources in the results.  So just check that each found Resource
        ## matches the search criteria
        assert count > 0
        for resource in result:
            assert "index" in resource["description"].lower()

        # def test_42_resource_search_across_multiple_fields(self):
        request_body = {"query": ["description:index", "format:json"]}
        response = app.post("/api/action/resource_search", json=request_body)
        result = json.loads(response.body)["result"]["results"]
        count = json.loads(response.body)["result"]["count"]

        ## Due to the side-effect of previously run tests, there may be extra
        ## resources in the results.  So just check that each found Resource
        ## matches the search criteria
        assert count > 0
        for resource in result:
            assert "index" in resource["description"].lower()
            assert "json" in resource["format"].lower()

        # def test_42_resource_search_test_percentage_is_escaped(self):
        request_body = {"query": ["description:index%"]}
        response = app.post("/api/action/resource_search", json=request_body)
        count = json.loads(response.body)["result"]["count"]

        # There shouldn't be any results.  If the '%' character wasn't
        # escaped correctly, then the search would match because of the
        # unescaped wildcard.
        assert count == 0

        # def test_42_resource_search_fields_parameter_still_accepted(self):
        """The fields parameter is deprecated, but check it still works.

        Remove this test when removing the fields parameter.  (#2603)
        """
        request_body = {"fields": {"description": "index"}}

        response = app.post("/api/action/resource_search", json=request_body)
        result = json.loads(response.body)["result"]["results"]
        count = json.loads(response.body)["result"]["count"]

        ## Due to the side-effect of previously run tests, there may be extra
        ## resources in the results.  So just check that each found Resource
        ## matches the search criteria
        assert count > 0
        for resource in result:
            assert "index" in resource["description"].lower()

        # def test_42_resource_search_accessible_via_get_request(self):
        response = app.get("/api/action/resource_search"
                           "?query=description:index&query=format:json")

        result = json.loads(response.body)["result"]["results"]
        count = json.loads(response.body)["result"]["count"]

        ## Due to the side-effect of previously run tests, there may be extra
        ## resources in the results.  So just check that each found Resource
        ## matches the search criteria
        assert count > 0
        for resource in result:
            assert "index" in resource["description"].lower()
            assert "json" in resource["format"].lower()
Beispiel #34
0
                            .all()

    ids = (package[0] for package in packages)

    for _id in ids:
        toolkit.get_action('package_patch')(context,{'id': _id, 'private': True})

    return True


@logic.side_effect_free
def make_datasets_public(context, data_dict):
    '''
        Changes the status of all datasets to `public`.
    '''
    log.info('Making all datasets public')

    model = context['model']

    packages = model.Session.query(model.Package.id) \
                            .filter(model.Package.type == 'dataset') \
                            .filter(model.Package.state == u'active') \
                            .filter(model.Package.private == True)
                            .all()

    ids = (package[0] for package in packages)

    for _id in ids:
        get_action('package_patch')(context, {'id': _id, 'private': True})

    return True
Beispiel #35
0
def harvest_source_clear(context, data_dict):
    '''
    Clears all datasets, jobs and objects related to a harvest source, but
    keeps the source itself.  This is useful to clean history of long running
    harvest sources to start again fresh.

    :param id: the id of the harvest source to clear
    :type id: string
    '''

    check_access('harvest_source_clear', context, data_dict)

    harvest_source_id = data_dict.get('id')

    source = HarvestSource.get(harvest_source_id)
    if not source:
        log.error('Harvest source %s does not exist', harvest_source_id)
        raise NotFound('Harvest source %s does not exist' % harvest_source_id)

    harvest_source_id = source.id

    # Clear all datasets from this source from the index
    harvest_source_index_clear(context, data_dict)

    model = context['model']

    # CKAN-2.6 or above: related don't exist any more
    if toolkit.check_ckan_version(max_version='2.5.99'):

        sql = '''select id from related where id in (
                  select related_id from related_dataset where dataset_id in (
                      select package_id from harvest_object
                      where harvest_source_id = '{harvest_source_id}'));'''.format(
            harvest_source_id=harvest_source_id)
        result = model.Session.execute(sql)
        ids = []
        for row in result:
            ids.append(row[0])
        related_ids = "('" + "','".join(ids) + "')"

    sql = '''begin;
        update package set state = 'to_delete' where id in (
            select package_id from harvest_object
            where harvest_source_id = '{harvest_source_id}');'''.format(
        harvest_source_id=harvest_source_id)

    # CKAN-2.3 or above: delete resource views, resource revisions & resources
    if toolkit.check_ckan_version(min_version='2.3'):
        sql += '''
        delete from resource_view where resource_id in (
            select id from resource where package_id in (
                select id from package where state = 'to_delete'));
        delete from resource_revision where package_id in (
            select id from package where state = 'to_delete');
        delete from resource where package_id in (
            select id from package where state = 'to_delete');
        '''
    # Backwards-compatibility: support ResourceGroup (pre-CKAN-2.3)
    else:
        sql += '''
        delete from resource_revision where resource_group_id in (
            select id from resource_group where package_id in (
                select id from package where state = 'to_delete'));
        delete from resource where resource_group_id in (
            select id from resource_group where package_id in (
                select id from package where state = 'to_delete'));
        delete from resource_group_revision where package_id in (
            select id from package where state = 'to_delete');
        delete from resource_group where package_id in (
            select id from package where state = 'to_delete');
        '''
    # CKAN pre-2.5: authz models were removed in migration 078
    if toolkit.check_ckan_version(max_version='2.4.99'):
        sql += '''
        delete from package_role where package_id in (
            select id from package where state = 'to_delete');
        delete from user_object_role where id not in (
            select user_object_role_id from package_role)
            and context = 'Package';
        '''

    sql += '''
    delete from harvest_object_error where harvest_object_id in (
        select id from harvest_object
        where harvest_source_id = '{harvest_source_id}');
    delete from harvest_object_extra where harvest_object_id in (
        select id from harvest_object
        where harvest_source_id = '{harvest_source_id}');
    delete from harvest_object where harvest_source_id = '{harvest_source_id}';
    delete from harvest_gather_error where harvest_job_id in (
        select id from harvest_job where source_id = '{harvest_source_id}');
    delete from harvest_job where source_id = '{harvest_source_id}';
    delete from package_tag_revision where package_id in (
        select id from package where state = 'to_delete');
    delete from member_revision where table_id in (
        select id from package where state = 'to_delete');
    delete from package_extra_revision where package_id in (
        select id from package where state = 'to_delete');
    delete from package_revision where id in (
        select id from package where state = 'to_delete');
    delete from package_tag where package_id in (
        select id from package where state = 'to_delete');
    delete from package_extra where package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship_revision where subject_package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship_revision where object_package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship where subject_package_id in (
        select id from package where state = 'to_delete');
    delete from package_relationship where object_package_id in (
        select id from package where state = 'to_delete');
    delete from member where table_id in (
        select id from package where state = 'to_delete');
     '''.format(harvest_source_id=harvest_source_id)

    if toolkit.check_ckan_version(max_version='2.5.99'):
        sql += '''
        delete from related_dataset where dataset_id in (
            select id from package where state = 'to_delete');
        delete from related where id in {related_ids};
        delete from package where id in (
            select id from package where state = 'to_delete');
        '''.format(related_ids=related_ids)
    else:
        # CKAN-2.6 or above: related don't exist any more
        sql += '''
        delete from package where id in (
            select id from package where state = 'to_delete');
        '''

    sql += '''
    commit;
    '''
    model.Session.execute(sql)

    # Refresh the index for this source to update the status object
    get_action('harvest_source_reindex')(context, {'id': harvest_source_id})

    return {'id': harvest_source_id}
Beispiel #36
0
def harvest_job_abort(context, data_dict):
    '''
    Aborts a harvest job. Given a harvest source_id, it looks for the latest
    one and (assuming it not already Finished) marks it as Finished. It also
    marks any of that source's harvest objects and (if not complete or error)
    marks them "ERROR", so any left in limbo are cleaned up. Does not actually
    stop running any queued harvest fetchs/objects.

    Specify either id or source_id.

    :param id: the job id to abort, or the id or name of the harvest source
               with a job to abort
    :type id: string
    :param source_id: the name or id of the harvest source with a job to abort
    :type source_id: string
    '''

    check_access('harvest_job_abort', context, data_dict)

    model = context['model']

    source_or_job_id = data_dict.get('source_id') or data_dict.get('id')
    if source_or_job_id:
        try:
            source = harvest_source_show(context, {'id': source_or_job_id})
        except NotFound:
            job = get_action('harvest_job_show')(context, {
                'id': source_or_job_id
            })
        else:
            # HarvestJob set status to 'Aborted'
            # Do not use harvest_job_list since it can use a lot of memory
            # Get the most recent job for the source
            job = model.Session.query(HarvestJob) \
                       .filter_by(source_id=source['id']) \
                       .order_by(HarvestJob.created.desc()).first()
            if not job:
                raise NotFound('Error: source has no jobs')
            job_id = job.id
            job = get_action('harvest_job_show')(context, {'id': job_id})

    if job['status'] != 'Finished':
        # i.e. New or Running
        job_obj = HarvestJob.get(job['id'])
        job_obj.status = new_status = 'Finished'
        model.repo.commit_and_remove()
        log.info('Harvest job changed status from "%s" to "%s"', job['status'],
                 new_status)
    else:
        log.info('Harvest job unchanged. Source %s status is: "%s"', job['id'],
                 job['status'])

    # HarvestObjects set to ERROR
    job_obj = HarvestJob.get(job['id'])
    objs = job_obj.objects
    for obj in objs:
        if obj.state not in ('COMPLETE', 'ERROR'):
            old_state = obj.state
            obj.state = 'ERROR'
            log.info('Harvest object changed state from "%s" to "%s": %s',
                     old_state, obj.state, obj.id)
        else:
            log.info('Harvest object not changed from "%s": %s', obj.state,
                     obj.id)
    model.repo.commit_and_remove()

    job_obj = HarvestJob.get(job['id'])
    return harvest_job_dictize(job_obj, context)
Beispiel #37
0
def harvest_jobs_run(context, data_dict):
    '''
    Runs scheduled jobs, checks if any jobs need marking as finished, and
    resubmits queue items if needed.

    This should be called every few minutes (e.g. by a cron), or else jobs
    will never show as finished.

    This used to also 'run' new jobs created by the web UI, putting them onto
    the gather queue, but now this is done by default when you create a job. If
    you need to send do this explicitly, then use
    ``harvest_send_job_to_gather_queue``.

    :param source_id: the id of the harvest source, if you just want to check
                      for its finished jobs (optional)
    :type source_id: string
    '''
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)

    session = context['session']

    source_id = data_dict.get('source_id')

    # Scheduled jobs
    if not source_id:
        _make_scheduled_jobs(context, data_dict)

    context['return_objects'] = False

    # Flag finished jobs as such
    jobs = harvest_job_list(context, {
        'source_id': source_id,
        'status': u'Running'
    })
    if len(jobs):
        for job in jobs:
            if job['gather_finished']:
                num_objects_in_progress = \
                    session.query(HarvestObject.id) \
                           .filter(HarvestObject.harvest_job_id == job['id']) \
                           .filter(and_((HarvestObject.state != u'COMPLETE'),
                                        (HarvestObject.state != u'ERROR'))) \
                           .count()

                if num_objects_in_progress == 0:
                    job_obj = HarvestJob.get(job['id'])
                    job_obj.status = u'Finished'
                    log.info('Marking job as finished %s %s',
                             job_obj.source.url, job_obj.id)

                    # save the time of finish, according to the last running
                    # object
                    last_object = session.query(HarvestObject) \
                        .filter(HarvestObject.harvest_job_id == job['id']) \
                        .filter(HarvestObject.import_finished != None) \
                        .order_by(HarvestObject.import_finished.desc()) \
                        .first()
                    if last_object:
                        job_obj.finished = last_object.import_finished
                    else:
                        job_obj.finished = job['gather_finished']
                    job_obj.save()

                    # Reindex the harvest source dataset so it has the latest
                    # status
                    get_action('harvest_source_reindex')(
                        context, {
                            'id': job_obj.source.id
                        })
                else:
                    log.debug('Ongoing job:%s source:%s', job['id'],
                              job['source_id'])

    # resubmit old redis tasks
    resubmit_jobs()

    return []  # merely for backwards compatibility
Beispiel #38
0
def proxy_ngsi_resource(context, data_dict):
    # Chunked proxy for ngsi resources.
    resource_id = data_dict['resource_id']
    log.info('Proxify resource {id}'.format(id=resource_id))
    resource = logic.get_action('resource_show')(context, {'id': resource_id})

    try:
        if 'oauth_req' in resource and resource['oauth_req'] == 'true':
            token = p.toolkit.c.usertoken['access_token']
            headers = {'X-Auth-Token': token, 'Content-Type': 'application/json', 'Accept': 'application/json'}
        else:
            headers = {'Content-Type': 'application/json', 'Accept': 'application/json'}


	
        if 'tenant' in resource:
            headers['Fiware-Service'] = resource['tenant']
        if 'service_path' in resource:
            headers['Fiware-ServicePath'] = resource['service_path']


        url = resource['url']
        parts = urlparse.urlsplit(url)


	if resource['format'] == 'ngsi-h':
	    if 'tenant' not in resource or len(resource['tenant']) == 0:
                details = 'Please complete the tenant field.'
                base.abort(409, detail=details)
            if 'service_path' not in resource or len(resource['service_path']) == 0:
                details = 'Please complete the service path field.'
                base.abort(409, detail=details)

	    lastN = url.lower().find('lastn')
            hLimit = url.lower().find('hlimit')
            hOffset = url.lower().find('hoffset')

	    if lastN == -1 and (hLimit == -1 or hOffset == -1):
                details = 'if no lastN is provided hLimit and hOffset are mandatory parameters.'
                base.abort(409, detail=details)


        if not parts.scheme or not parts.netloc:
            base.abort(409, detail='Invalid URL.')

        if url.lower().find('/querycontext') != -1:
            if 'payload' in resource:
                resource['payload'] = resource['payload'].replace("'", '"')
                resource['payload'] = resource['payload'].replace(" ", "")
            else:
                details = 'Please add a  payload to complete the query.'
                base.abort(409, detail=details)

            payload = json.dumps(json.loads(resource['payload']))
            r = requests.post(url, headers=headers, data=payload, stream=True)

        else:
            r = requests.get(url, headers=headers, stream=True)

        if r.status_code == 401:
	    if 'oauth_req' in resource and resource['oauth_req'] == 'true':
                details = 'ERROR 401 token expired. Retrieving new token, reload please.'
                log.info(details)
                base.abort(409, detail=details)
                p.toolkit.c.usertoken_refresh()

            elif 'oauth_req' not in resource or resource['oauth_req'] == 'false':
                details = 'This query may need Oauth-token, please check if the token field on resource_edit is correct.'
                log.info(details)
                base.abort(409, detail=details)

        else:
            r.raise_for_status()
            base.response.content_type = r.headers['content-type']
            base.response.charset = r.encoding


        length = 0
        for chunk in r.iter_content(chunk_size=CHUNK_SIZE):
            base.response.body_file.write(chunk)
            length += len(chunk)
            if length >= MAX_FILE_SIZE:
                details = 'Content is too large to be proxied. Complete the Context Broker query with pagination parameters to resolve this issue.'
                base.abort(409, headers={'content-encoding': ''}, detail=details)

    except ValueError:
        details = ''
        base.abort(409, detail=details)
    except requests.HTTPError:
        details = 'Could not proxy ngsi_resource. We are working to resolve this issue as quickly as possible'
        base.abort(409, detail=details)
    except requests.ConnectionError:
        details = 'Could not proxy ngsi_resource because a connection error occurred.'
        base.abort(502, detail=details)
    except requests.Timeout:
        details = 'Could not proxy ngsi_resource because the connection timed out.'
        base.abort(504, detail=details)
 def result_page():
     return get_action('querystore_resolve')(None,
                                             dict({
                                                 'pid': pid,
                                                 'records_format': fmt
                                             }))
Beispiel #40
0
            else:
                try:
                    date = h.date_str_to_datetime(revision_ref)
                    context['revision_date'] = date
                except TypeError, e:
                    base.abort(400, _('Invalid revision format: %r') % e.args)
                except ValueError, e:
                    base.abort(400, _('Invalid revision format: %r') % e.args)
        elif len(split) > 2:
            base.abort(
                400,
                _('Invalid revision format: %r') % 'Too many "@" symbols')

        # check if package exists
        try:
            c.pkg_dict = logic.get_action('package_show')(context, data_dict)
            c.pkg = context['package']
        except NotFound:
            base.abort(404, _('Dataset not found'))
        except NotAuthorized:
            base.abort(401, _('Unauthorized to read package %s') % id)

        # used by disqus plugin
        c.current_package_id = c.pkg.id
        c.related_count = c.pkg.related_count

        # can the resources be previewed?
        for resource in c.pkg_dict['resources']:
            resource['can_be_previewed'] = self._resource_preview({
                'resource':
                resource,
Beispiel #41
0
    def inject_pulled_data(self):
        dir_with_data = self.options.dir_with_data
        if not dir_with_data:
            logger.info('Data dir is not provided. Pulling data...')
            dir_with_data = self.pull_data()
        site_user = logic.get_action('get_site_user')()
        logger.debug(
            'Using <User name={name} api_key={apikey}>'.format(**site_user))
        path = pathlib2.Path(dir_with_data)
        pkg_counter = collections.Counter()
        res_counter = collections.Counter()
        entity_counter = collections.Counter()
        group_set = set()
        for doc in path.iterdir():
            with doc.open() as fd:
                data = json.load(fd)
                datasets = data['result']
                logger.debug('{:3} datasets inside file://{}'.format(
                    len(datasets), doc))
                entity_counter.update(dataset=len(datasets))
                for dataset in datasets:
                    try:
                        pkg_counter.update(dataset.keys())
                        resources = dataset.get('resources', [])
                        entity_counter.update(resource=len(resources))
                        for group in dataset.get('groups', []):
                            if group['id'] in group_set:
                                continue
                            group['name'] = group['id']
                            group['image_url'] = group['image_display_url']
                            group.pop('display_name', None)
                            group_set.add(group['id'])
                            logic.get_action('group_create')(dict(
                                model=model, user=site_user['name']), group)
                        for res in resources[:]:
                            res_counter.update(res.keys())
                            if not res['name']:
                                logger.warn(
                                    'Incorrect resource inside {}({})\n\t{}'.
                                    format(dataset['id'], doc, res))
                                resources.remove(res)
                                continue
                            if res['size']:
                                res['size'] = int(
                                    float(res['size'].split()[0]) * 1024)
                            res['last_modified'] = _reformat_date(
                                res['last_modified'])
                            res['created'] = _reformat_date(res['created'])
                        dataset['name'] = dataset['id']
                        dataset['state'] = dataset['state'].lower()
                        dataset['type'] = dataset['type'].lower()
                        dataset['license_id'] = dataset['license_title']
                        pkg_dict = logic.get_action('package_create')(dict(
                            user=site_user['name'], model=model), dataset)
                        logger.debug('Created dataset with id: {}'.format(
                            pkg_dict['id']))
                    except Exception as e:
                        logger.error('{}:<{}> {}, {}'.format(
                            type(e), dataset['id'], e, vars(e)))

        log_stats(entity_counter, pkg_counter, res_counter)
Beispiel #42
0
            user_obj = context['user_obj']
        except NotFound, e:
            abort(404, _('User not found'))

        c.reset_key = request.params.get('key')
        if not mailer.verify_reset_link(user_obj, c.reset_key):
            h.flash_error(_('Invalid reset key. Please try again.'))
            abort(403)

        if request.method == 'POST':
            try:
                context['reset_password'] = True
                new_password = self._get_form_password()
                user_dict['password'] = new_password
                user_dict['reset_key'] = c.reset_key
                user = get_action('user_update')(context, user_dict)

                h.flash_success(_("Your password has been reset."))
                h.redirect_to('/')
            except NotAuthorized:
                h.flash_error(_('Unauthorized to edit user %s') % id)
            except NotFound, e:
                h.flash_error(_('User not found'))
            except DataError:
                h.flash_error(_(u'Integrity Error'))
            except ValidationError, e:
                h.flash_error(u'%r' % e.error_dict)
            except ValueError, ve:
                h.flash_error(unicode(ve))

        c.user_dict = user_dict
Beispiel #43
0
def activity_list_to_html(context, activity_stream, extra_vars):
    '''Return the given activity stream as a snippet of HTML.

    :param activity_stream: the activity stream to render
    :type activity_stream: list of activity dictionaries
    :param extra_vars: extra variables to pass to the activity stream items
        template when rendering it
    :type extra_vars: dictionary

    :rtype: HTML-formatted string

    '''
    activity_list = []  # These are the activity stream messages.
    for activity in activity_stream:
        detail = None
        activity_type = activity['activity_type']
        # Some activity types may have details.
        if activity_type in activity_stream_actions_with_detail:
            details = logic.get_action('activity_detail_list')(
                context=context, data_dict={
                    'id': activity['id']
                })
            # If an activity has just one activity detail then render the
            # detail instead of the activity.
            if len(details) == 1:
                detail = details[0]
                object_type = detail['object_type']

                if object_type == 'PackageExtra':
                    object_type = 'package_extra'

                new_activity_type = '%s %s' % (detail['activity_type'],
                                               object_type.lower())
                if new_activity_type in activity_stream_string_functions:
                    activity_type = new_activity_type

        if not activity_type in activity_stream_string_functions:
            raise NotImplementedError("No activity renderer for activity "
                                      "type '%s'" % activity_type)

        if activity_type in activity_stream_string_icons:
            activity_icon = activity_stream_string_icons[activity_type]
        else:
            activity_icon = activity_stream_string_icons['undefined']

        activity_msg = activity_stream_string_functions[activity_type](
            context, activity)

        # Get the data needed to render the message.
        matches = re.findall('\{([^}]*)\}', activity_msg)
        data = {}
        for match in matches:
            snippet = activity_snippet_functions[match](activity, detail)
            data[str(match)] = snippet

        activity_list.append({
            'msg': activity_msg,
            'type': activity_type.replace(' ', '-').lower(),
            'icon': activity_icon,
            'data': data,
            'timestamp': activity['timestamp'],
            'is_new': activity.get('is_new', False)
        })
    extra_vars['activities'] = activity_list
    return literal(
        base.render('activity_streams/activity_stream_items.html',
                    extra_vars=extra_vars))
Beispiel #44
0
def harvest_jobs_run(context, data_dict):
    log.info('Harvest job run: %r', data_dict)
    check_access('harvest_jobs_run', context, data_dict)

    session = context['session']

    source_id = data_dict.get('source_id', None)

    if not source_id:
        _make_scheduled_jobs(context, data_dict)

    context['return_objects'] = False

    # Flag finished jobs as such
    jobs = harvest_job_list(context, {
        'source_id': source_id,
        'status': u'Running'
    })
    if len(jobs):
        package_index = PackageSearchIndex()
        for job in jobs:
            if job['gather_finished']:
                objects = session.query(HarvestObject.id) \
                          .filter(HarvestObject.harvest_job_id==job['id']) \
                          .filter(and_((HarvestObject.state!=u'COMPLETE'),
                                       (HarvestObject.state!=u'ERROR'))) \
                          .order_by(HarvestObject.import_finished.desc())

                if objects.count() == 0:
                    job_obj = HarvestJob.get(job['id'])
                    job_obj.status = u'Finished'

                    last_object = session.query(HarvestObject) \
                          .filter(HarvestObject.harvest_job_id==job['id']) \
                          .filter(HarvestObject.import_finished!=None) \
                          .order_by(HarvestObject.import_finished.desc()) \
                          .first()
                    if last_object:
                        job_obj.finished = last_object.import_finished
                    job_obj.save()

                    # recreate job for datajson collection or the like.
                    source = job_obj.source
                    source_config = json.loads(source.config or '{}')
                    datajson_collection = source_config.get(
                        'datajson_collection')
                    if datajson_collection == 'parents_run':
                        new_job = HarvestJob()
                        new_job.source = source
                        new_job.save()
                        source_config['datajson_collection'] = 'children_run'
                        source.config = json.dumps(source_config)
                        source.save()
                    elif datajson_collection:
                        # reset the key if 'children_run', or anything.
                        source_config.pop("datajson_collection", None)
                        source.config = json.dumps(source_config)
                        source.save()

                    # Reindex the harvest source dataset so it has the latest
                    # status
                    if 'extras_as_string' in context:
                        del context['extras_as_string']
                    context.update({'validate': False, 'ignore_auth': True})
                    package_dict = logic.get_action('package_show')(
                        context, {
                            'id': job_obj.source.id
                        })

                    if package_dict:
                        package_index.index_package(package_dict)

    # resubmit old redis tasks
    resubmit_jobs()

    # Check if there are pending harvest jobs
    jobs = harvest_job_list(context, {
        'source_id': source_id,
        'status': u'New'
    })
    if len(jobs) == 0:
        log.info('No new harvest jobs.')
        raise Exception('There are no new harvesting jobs')

    # Send each job to the gather queue
    publisher = get_gather_publisher()
    sent_jobs = []
    for job in jobs:
        context['detailed'] = False
        source = harvest_source_show(context, {'id': job['source_id']})
        if source['active']:
            job_obj = HarvestJob.get(job['id'])
            job_obj.status = job['status'] = u'Running'
            job_obj.save()
            publisher.send({'harvest_job_id': job['id']})
            log.info('Sent job %s to the gather queue' % job['id'])
            sent_jobs.append(job)

    publisher.close()
    return sent_jobs
def _notifications_from_saved_searches(user_dict, since):
    # Note we ignore "since" here as we aren't going to
    # look at when the search changed
    context = {
        'model': model,
        'session': model.Session,
        'user': user_dict['id']
    }
    # FIXME: same comment as below regarding direct
    # access to model applies here - move to logic
    _search_list = model.saved_search.user_saved_searches_list(user_dict['id'])
    search_list = model_dictize.saved_search_list_dictize(
        _search_list, context)
    activity_list = []
    for search in search_list:
        # Here we also build a link to search (needed for email later)
        # FIXME: Put this into a function so that the listing of searches can also do it
        if True:
            if True:
                fq = ''
                q = ''
                search_extras = {}
                for (param, value) in _make_parameters(
                        search['search_string'].replace("?", "")):
                    if param not in ['q', 'page', 'sort'] \
                            and len(value) and not param.startswith('_'):
                        if not param.startswith('ext_'):
                            if param == "organization":
                                param = "owner_org"
                            fq += ' %s:"%s"' % (param, value)
                        else:
                            search_extras[param] = value
                    elif param == 'q':
                        q = value
                    elif param == '_search_organization' and value != '0':
                        fq += ' owner_org:%s' % (value)
                    elif param == '_search_group' and value != '0':
                        fq += ' groups:%s' % (value)
                    elif param == '_search_package_type' and value != '0':
                        package_type = value
                        type_is_search_all = h.type_is_search_all(package_type)

                        if not type_is_search_all:
                            # Only show datasets of this particular type
                            fq += ' +dataset_type:{type}'.format(
                                type=package_type)

                data_dict = {
                    'q':
                    q,
                    'fq':
                    fq.strip(),
                    'rows':
                    1000,
                    'extras':
                    search_extras,
                    'include_private':
                    asbool(
                        config.get('ckan.search.default_include_private',
                                   True)),
                }

                query = logic.get_action('package_search')(context, data_dict)
                ids = set()
                for result in query['results']:
                    ids.add(result['id'])
                if search['last_run']:
                    last_ids = search['last_results']
                    last_ids = set(last_ids)
                    difference = len(ids - last_ids)
                    # If there's a difference in result lists
                    if difference > 0:
                        activity = {
                            'data': {
                                'search_url':
                                config.get('ckan.site_url') +
                                search['search_url_in_ckan'],
                                'activity_type':
                                'search_results_changed'
                            }
                        }
                        activity_list.append(activity)
                    else:
                        # If any result updated
                        resultchange = False
                        for result in query['results']:
                            fmt = '%Y-%m-%dT%H:%M:%S.%f'
                            if datetime.datetime.strptime(
                                    result['metadata_modified'],
                                    fmt) > datetime.datetime.strptime(
                                        search['last_run'], fmt):
                                activity = {
                                    'data': {
                                        'search_url':
                                        config.get('ckan.site_url') +
                                        search['search_url_in_ckan'],
                                        'activity_type':
                                        'search_results_updated'
                                    }
                                }
                                activity_list.append(activity)
                                break

                search['last_results'] = list(ids)
                search['last_run'] = datetime.datetime.utcnow()

                model_save.saved_search_dict_save(search, context)

                if not context.get('defer_commit'):
                    model.repo.commit()

        #except SearchQueryError, se:
        #    # FIXME: Ideally, tell user about this so they can delete/edit
        #    log.error('Dataset search query rejected: %r', se.args)
        #except SearchError, se:
        #    # FIXME: Ideally, tell user about this so they can delete/edit/inform admin
        #    log.error('Dataset search error: %r', se.args)

    return _notifications_for_saved_searches(activity_list, user_dict)
Beispiel #46
0
def update_config():
    ''' This code needs to be run when the config is changed to take those
    changes into account. It is called whenever a plugin is loaded as the
    plugin might have changed the config values (for instance it might
    change ckan.site_url) '''

    for plugin in p.PluginImplementations(p.IConfigurer):
        # must do update in place as this does not work:
        # config = plugin.update_config(config)
        plugin.update_config(config)

    # Set whitelisted env vars on config object
    # This is set up before globals are initialized

    ckan_db = os.environ.get('CKAN_DB', None)
    if ckan_db:
        msg = 'Setting CKAN_DB as an env var is deprecated and will be' \
            ' removed in a future release. Use CKAN_SQLALCHEMY_URL instead.'
        log.warn(msg)
        config['sqlalchemy.url'] = ckan_db

    for option in CONFIG_FROM_ENV_VARS:
        from_env = os.environ.get(CONFIG_FROM_ENV_VARS[option], None)
        if from_env:
            config[option] = from_env

    root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

    site_url = config.get('ckan.site_url', '')
    if not site_url:
        raise RuntimeError(
            'ckan.site_url is not configured and it must have a value.'
            ' Please amend your .ini file.')
    if not site_url.lower().startswith('http'):
        raise RuntimeError(
            'ckan.site_url should be a full URL, including the schema '
            '(http or https)')

    display_timezone = config.get('ckan.display_timezone', '')
    if (display_timezone and display_timezone != 'server'
            and display_timezone not in pytz.all_timezones):
        raise CkanConfigurationException(
            "ckan.display_timezone is not 'server' or a valid timezone")

    # Remove backslash from site_url if present
    config['ckan.site_url'] = config['ckan.site_url'].rstrip('/')

    ckan_host = config['ckan.host'] = urlparse(site_url).netloc
    if config.get('ckan.site_id') is None:
        if ':' in ckan_host:
            ckan_host, port = ckan_host.split(':')
        assert ckan_host, 'You need to configure ckan.site_url or ' \
                          'ckan.site_id for SOLR search-index rebuild to work.'
        config['ckan.site_id'] = ckan_host

    # ensure that a favicon has been set
    favicon = config.get('ckan.favicon', '/base/images/ckan.ico')
    config['ckan.favicon'] = favicon

    # Init SOLR settings and check if the schema is compatible
    # from ckan.lib.search import SolrSettings, check_solr_schema_version

    # lib.search is imported here as we need the config enabled and parsed
    search.SolrSettings.init(config.get('solr_url'), config.get('solr_user'),
                             config.get('solr_password'))
    search.check_solr_schema_version()

    routes_map = routing.make_map()

    lib_plugins.reset_package_plugins()
    lib_plugins.set_default_package_plugin()
    lib_plugins.reset_group_plugins()
    lib_plugins.set_default_group_plugin()

    config['routes.map'] = routes_map
    # The RoutesMiddleware needs its mapper updating if it exists
    if 'routes.middleware' in config:
        config['routes.middleware'].mapper = routes_map
    # routes.named_routes is a CKAN thing
    config['routes.named_routes'] = routing.named_routes
    config['pylons.app_globals'] = app_globals.app_globals
    # initialise the globals
    app_globals.app_globals._init()

    helpers.load_plugin_helpers()
    config['pylons.h'] = helpers.helper_functions

    # Templates and CSS loading from configuration
    valid_base_templates_folder_names = ['templates', 'templates-bs2']
    templates = config.get('ckan.base_templates_folder', 'templates')
    config['ckan.base_templates_folder'] = templates

    if templates not in valid_base_templates_folder_names:
        raise CkanConfigurationException(
            'You provided an invalid value for ckan.base_templates_folder. '
            'Possible values are: "templates" and "templates-bs2".')

    jinja2_templates_path = os.path.join(root, templates)
    log.info('Loading templates from %s' % jinja2_templates_path)
    template_paths = [jinja2_templates_path]

    extra_template_paths = config.get('extra_template_paths', '')
    if extra_template_paths:
        # must be first for them to override defaults
        template_paths = extra_template_paths.split(',') + template_paths
    config['computed_template_paths'] = template_paths

    # Set the default language for validation messages from formencode
    # to what is set as the default locale in the config
    default_lang = config.get('ckan.locale_default', 'en')
    formencode.api.set_stdtranslation(domain="FormEncode",
                                      languages=[default_lang])

    # Markdown ignores the logger config, so to get rid of excessive
    # markdown debug messages in the log, set it to the level of the
    # root logger.
    logging.getLogger("MARKDOWN").setLevel(logging.getLogger().level)

    # Create Jinja2 environment
    env = jinja_extensions.Environment(
        **jinja_extensions.get_jinja_env_options())
    env.install_gettext_callables(_, ungettext, newstyle=True)
    # custom filters
    env.filters['empty_and_escape'] = jinja_extensions.empty_and_escape
    config['pylons.app_globals'].jinja_env = env

    # CONFIGURATION OPTIONS HERE (note: all config options will override
    # any Pylons config options)

    # Initialize SQLAlchemy
    engine = sqlalchemy.engine_from_config(config)
    model.init_model(engine)

    for plugin in p.PluginImplementations(p.IConfigurable):
        plugin.configure(config)

    # reset the template cache - we do this here so that when we load the
    # environment it is clean
    render.reset_template_info_cache()

    # clear other caches
    logic.clear_actions_cache()
    logic.clear_validators_cache()
    authz.clear_auth_functions_cache()

    # Here we create the site user if they are not already in the database
    try:
        logic.get_action('get_site_user')({'ignore_auth': True}, None)
    except (sqlalchemy.exc.ProgrammingError, sqlalchemy.exc.OperationalError):
        # (ProgrammingError for Postgres, OperationalError for SQLite)
        # The database is not initialised.  This is a bit dirty.  This occurs
        # when running tests.
        pass
    except sqlalchemy.exc.InternalError:
        # The database is not initialised.  Travis hits this
        pass

    # Close current session and open database connections to ensure a clean
    # clean environment even if an error occurs later on
    model.Session.remove()
    model.Session.bind.dispose()
Beispiel #47
0
def test_action_plugin_override():
    status_show_original = logic.get_action("status_show")(None, {})
    with plugins.use_plugin("action_plugin"):
        assert (logic.get_action("status_show")(None, {}) !=
                status_show_original)
    assert logic.get_action("status_show")(None, {}) == status_show_original
Beispiel #48
0
    def _create_or_update_package(self, package_dict, harvest_object):
        '''
        Creates a new package or updates an exisiting one according to the
        package dictionary provided. The package dictionary should look like
        the REST API response for a package:

        http://ckan.net/api/rest/package/statistics-catalunya

        Note that the package_dict must contain an id, which will be used to
        check if the package needs to be created or updated (use the remote
        dataset id).

        If the remote server provides the modification date of the remote
        package, add it to package_dict['metadata_modified'].


        TODO: Not sure it is worth keeping this function. If useful it should
        use the output of package_show logic function (maybe keeping support
        for rest api based dicts
        '''
        try:
            # Change default schema
            schema = default_create_package_schema()
            schema['id'] = [ignore_missing, unicode]
            schema['__junk'] = [ignore]

            # Check API version
            if self.config:
                try:
                    api_version = int(self.config.get('api_version', 2))
                except ValueError:
                    raise ValueError('api_version must be an integer')

                #TODO: use site user when available
                user_name = self.config.get('user', u'harvest')
            else:
                api_version = 2
                user_name = u'harvest'

            context = {
                'model': model,
                'session': Session,
                'user': user_name,
                'api_version': api_version,
                'schema': schema,
                'ignore_auth': True,
            }

            if self.config and self.config.get('clean_tags', False):
                tags = package_dict.get('tags', [])
                tags = [munge_tag(t) for t in tags if munge_tag(t) != '']
                tags = list(set(tags))
                package_dict['tags'] = tags

            # Check if package exists
            data_dict = {}
            data_dict['id'] = package_dict['id']
            try:
                existing_package_dict = get_action('package_show')(context,
                                                                   data_dict)

                # In case name has been modified when first importing. See issue #101.
                package_dict['name'] = existing_package_dict['name']

                # Check modified date
                if not 'metadata_modified' in package_dict or \
                   package_dict['metadata_modified'] > existing_package_dict.get('metadata_modified'):
                    log.info(
                        'Package with GUID %s exists and needs to be updated' %
                        harvest_object.guid)
                    # Update package
                    context.update({'id': package_dict['id']})
                    package_dict.setdefault('name',
                                            existing_package_dict['name'])
                    new_package = get_action('package_update_rest')(
                        context, package_dict)

                else:
                    log.info('Package with GUID %s not updated, skipping...' %
                             harvest_object.guid)
                    return

                # Flag the other objects linking to this package as not current anymore
                from ckanext.harvest.model import harvest_object_table
                conn = Session.connection()
                u = update(harvest_object_table) \
                        .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \
                        .values(current=False)
                conn.execute(u, b_package_id=new_package['id'])

                # Flag this as the current harvest object

                harvest_object.package_id = new_package['id']
                harvest_object.current = True
                harvest_object.save()

            except NotFound:
                # Package needs to be created

                # Get rid of auth audit on the context otherwise we'll get an
                # exception
                context.pop('__auth_audit', None)

                # Set name for new package to prevent name conflict, see issue #117
                if package_dict.get('name', None):
                    package_dict['name'] = self._gen_new_name(
                        package_dict['name'])
                else:
                    package_dict['name'] = self._gen_new_name(
                        package_dict['title'])

                log.info(
                    'Package with GUID %s does not exist, let\'s create it' %
                    harvest_object.guid)
                harvest_object.current = True
                harvest_object.package_id = package_dict['id']
                # Defer constraints and flush so the dataset can be indexed with
                # the harvest object id (on the after_show hook from the harvester
                # plugin)
                harvest_object.add()

                model.Session.execute(
                    'SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED')
                model.Session.flush()

                new_package = get_action('package_create_rest')(context,
                                                                package_dict)

            Session.commit()

            return True

        except ValidationError, e:
            log.exception(e)
            self._save_object_error(
                'Invalid package with GUID %s: %r' %
                (harvest_object.guid, e.error_dict), harvest_object, 'Import')
    def download(self):
        #from django.http import HttpResponse
        #try:
        from ckan.common import response
        import unicodecsv as csv

        response.headers[
            "Content-Disposition"] = "attachment; filename=resources.csv"
        context = {
            'model': model,
            'session': model.Session,
            'user': c.user or c.author,
            'auth_user_obj': c.userobj
        }
        data_dict = {
            'q': '*:*',
            'facet': 'false',
            'start': 0,
            'rows': 10000000,
            'sort': 'metadata_created desc',
            'fq': 'capacity:"public" +type:dataset'
        }
        query = logic.get_action('package_search')(context, data_dict)
        datasets = query['results']

        writer = csv.writer(response)
        writer.writerow([
            _('Dataset ID').encode('utf-8', 'ignore'),
            _('Name').replace("\n", "").replace("\r",
                                                "").replace("\t", "").encode(
                                                    'utf-8', 'ignore'),
            _('Description').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('URL').replace("\n", "").replace("\r",
                                               "").replace("\t", "").encode(
                                                   'utf-8', 'ignore'),
            _('Format').replace("\n", "").replace("\r",
                                                  "").replace("\t", "").encode(
                                                      'utf-8', 'ignore'),
            _('Type').replace("\n", "").replace("\r",
                                                "").replace("\t", "").encode(
                                                    'utf-8', 'ignore'),
            _('5 Stars of Openness').replace("\n", "").replace(
                "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
            _('Creation Date').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Last Modified').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Dataset name').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Dataset title').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Dataset notes').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Dataset category').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Dataset creation date').replace("\n", "").replace(
                "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
            _('Dataset modification date').encode('utf-8', 'ignore'),
            _('Organization name').replace("\n", "").replace("\r", "").replace(
                "\t", "").encode('utf-8', 'ignore'),
            _('Organization title').replace("\n", "").replace(
                "\r", "").replace("\t", "").encode('utf-8', 'ignore')
        ])
        for dataset in datasets:
            org = dataset.get('organization')
            for resource in dataset.get('resources'):
                writer.writerow([
                    resource.get('id'),
                    resource.get('name').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    resource.get('description').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    resource.get('url'),
                    resource.get('format'),
                    resource.get('resource_type'),
                    resource.get('openness_score'),
                    resource.get('created'),
                    resource.get('last_modified'),
                    dataset.get('name').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    dataset.get('title').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    dataset.get('notes').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    dataset.get('category').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    dataset.get('metadata_created'),
                    dataset.get('metadata_modified'),
                    org.get('name').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                    org.get('title').replace("\n", "").replace(
                        "\r", "").replace("\t", "").encode('utf-8', 'ignore'),
                ])
        return response
Beispiel #50
0
def xloader_hook(context, data_dict):
    ''' Update xloader task. This action is typically called by ckanext-xloader
    whenever the status of a job changes.

    :param metadata: metadata provided when submitting job. key-value pairs.
                     Must have resource_id property.
    :type metadata: dict
    :param status: status of the job from the xloader service. Allowed values:
                   pending, running, running_but_viewable, complete, error
                   (which must all be valid values for task_status too)
    :type status: string
    :param error: Error raised during job execution
    :type error: string

    NB here are other params which are in the equivalent object in
    ckan-service-provider (from job_status):
        :param sent_data: Input data for job
        :type sent_data: json encodable data
        :param job_id: An identifier for the job
        :type job_id: string
        :param result_url: Callback url
        :type result_url: url string
        :param data: Results from job.
        :type data: json encodable data
        :param requested_timestamp: Time the job started
        :type requested_timestamp: timestamp
        :param finished_timestamp: Time the job finished
        :type finished_timestamp: timestamp

    '''

    metadata, status = _get_or_bust(data_dict, ['metadata', 'status'])

    res_id = _get_or_bust(metadata, 'resource_id')

    # Pass metadata, not data_dict, as it contains the resource id needed
    # on the auth checks
    p.toolkit.check_access('xloader_submit', context, metadata)

    task = p.toolkit.get_action('task_status_show')(context, {
        'entity_id': res_id,
        'task_type': 'xloader',
        'key': 'xloader'
    })

    task['state'] = status
    task['last_updated'] = six.text_type(datetime.datetime.utcnow())
    task['error'] = data_dict.get('error')

    resubmit = False

    if status in ('complete', 'running_but_viewable'):
        # Create default views for resource if necessary (only the ones that
        # require data to be in the DataStore)
        resource_dict = p.toolkit.get_action('resource_show')(
            context, {'id': res_id})

        dataset_dict = p.toolkit.get_action('package_show')(
            context, {'id': resource_dict['package_id']})

        for plugin in p.PluginImplementations(xloader_interfaces.IXloader):
            plugin.after_upload(context, resource_dict, dataset_dict)

        logic.get_action('resource_create_default_resource_views')(
            context,
            {
                'resource': resource_dict,
                'package': dataset_dict,
                'create_datastore_views': True,
            })

        # Check if the uploaded file has been modified in the meantime
        if (resource_dict.get('last_modified')
                and metadata.get('task_created')):
            try:
                last_modified_datetime = parse_date(
                    resource_dict['last_modified'])
                task_created_datetime = parse_date(metadata['task_created'])
                if last_modified_datetime > task_created_datetime:
                    log.debug('Uploaded file more recent: {0} > {1}'.format(
                        last_modified_datetime, task_created_datetime))
                    resubmit = True
            except ValueError:
                pass
        # Check if the URL of the file has been modified in the meantime
        elif (resource_dict.get('url')
                and metadata.get('original_url')
                and resource_dict['url'] != metadata['original_url']):
            log.debug('URLs are different: {0} != {1}'.format(
                resource_dict['url'], metadata['original_url']))
            resubmit = True

    context['ignore_auth'] = True
    p.toolkit.get_action('task_status_update')(context, task)

    if resubmit:
        log.debug('Resource {0} has been modified, '
                  'resubmitting to DataPusher'.format(res_id))
        p.toolkit.get_action('xloader_submit')(
            context, {'resource_id': res_id})
Beispiel #51
0
 def test_member_create_raises_if_user_unauthorized_to_update_group(self):
     ctx, dd = self._build_context(
         self.pkgs[0].id, "package", user="******"
     )
     with pytest.raises(logic.NotAuthorized):
         logic.get_action("member_create")(ctx, dd)
def unsafe_user_show(context, data_dict):
    '''Return a user account.
        Either the ``id`` or the ``user_obj`` parameter must be given.
        :param id: the id or name of the user (optional)
        :type id: string
        :param user_obj: the user dictionary of the user (optional)
        :type user_obj: user dictionary
        :param include_datasets: Include a list of datasets the user has created.
            If it is the same user or a sysadmin requesting, it includes datasets
            that are draft or private.
            (optional, default:``False``, limit:50)
        :type include_datasets: bool
        :param include_num_followers: Include the number of followers the user has
            (optional, default:``False``)
        :type include_num_followers: bool
        :param include_password_hash: Include the stored password hash
            (sysadmin only, optional, default:``False``)
        :type include_password_hash: bool
        :returns: the details of the user. Includes email_hash, number_of_edits and
            number_created_packages (which excludes draft or private datasets
            unless it is the same user or sysadmin making the request). Excludes
            the password (hash) and reset_key. If it is the same user or a
            sysadmin requesting, the email and apikey are included.
        :rtype: dictionary
        '''

    model = context['model']

    try:
        unsafe_show = asbool(context.get('unsafe_user_show', 'False'))
        if not unsafe_show:
            raise ValidationError(
                'There is no context setup for unsafe_user_show')
    except ValueError:
        raise ValidationError(
            "Context unsafe_user_show should be boolean True or False")

    id = data_dict.get('id', None)
    provided_user = data_dict.get('user_obj', None)
    if id:
        user_obj = model.User.get(id)
        context['user_obj'] = user_obj
        if user_obj is None:
            raise NotFound
    elif provided_user:
        context['user_obj'] = user_obj = provided_user
    else:
        raise NotFound

    #_check_access('user_show', context, data_dict)

    # include private and draft datasets?
    requester = context.get('user')
    sysadmin = False
    if requester:
        sysadmin = authz.is_sysadmin(requester)
        requester_looking_at_own_account = requester == user_obj.name
        include_private_and_draft_datasets = (sysadmin or
                                              requester_looking_at_own_account)
    else:
        include_private_and_draft_datasets = False
    context['count_private_and_draft_datasets'] = \
        include_private_and_draft_datasets

    include_password_hash = sysadmin and asbool(
        data_dict.get('include_password_hash', False))

    user_dict = model_dictize.user_dictize(user_obj, context,
                                           include_password_hash)

    if context.get('return_minimal'):
        log.warning('Use of the "return_minimal" in user_show is '
                    'deprecated.')
        return user_dict

    if asbool(data_dict.get('include_datasets', False)):
        user_dict['datasets'] = []

        fq = "+creator_user_id:{0}".format(user_dict['id'])

        search_dict = {'rows': 50}

        if include_private_and_draft_datasets:
            search_dict.update({
                'include_private': True,
                'include_drafts': True
            })

        search_dict.update({'fq': fq})

        user_dict['datasets'] = \
            logic.get_action('package_search')(context=context,
                                               data_dict=search_dict) \
                .get('results')

    if asbool(data_dict.get('include_num_followers', False)):
        user_dict['num_followers'] = logic.get_action('user_follower_count')(
            {
                'model': model,
                'session': model.Session
            }, {
                'id': user_dict['id']
            })

    return user_dict
Beispiel #53
0
 def _member_delete(self, obj, obj_type):
     ctx, dd = self._build_context(obj, obj_type)
     return logic.get_action("member_delete")(ctx, dd)
        if (c.pkg_dict.get('type')
                == 'application') and c.pkg_dict.get('dataset_name'):
            try:
                data_dict = {
                    'q':
                    '*:*',
                    'fq':
                    '+type:dataset +name:("' +
                    '" OR "'.join(c.pkg_dict.get('dataset_name')) + '")',
                    'facet':
                    'false',
                    'sort':
                    'metadata_modified desc',
                }
                query = logic.get_action('package_search')(context, data_dict)
                c.datasets = query['results']
            except:
                pass

        if (c.pkg_dict.get('type') == 'dataset'):
            try:
                data_dict = {
                    'q':
                    '*:*',
                    'facet':
                    'false',
                    'rows':
                    3,
                    'start':
                    0,
Beispiel #55
0
 def _member_create_as_user(self, obj, obj_type, capacity, user):
     """Makes the given object a member of cls.group using privileges of
     the given user."""
     ctx, dd = self._build_context(obj, obj_type, capacity, user=user)
     return logic.get_action("member_create")(ctx, dd)
Beispiel #56
0
 def _member_delete_in_group(self, obj, obj_type, group_id):
     ctx, dd = self._build_context(obj, obj_type, group_id=group_id)
     return logic.get_action("member_delete")(ctx, dd)
Beispiel #57
0
    def get_package_dict(self, context, data_dict):

        package_dict = data_dict['package_dict']

        list_map = {'access_constraints': 'copyright_notice'}

        for source, target in list_map.iteritems():
            for extra in package_dict['extras']:
                if extra['key'] == source:
                    value = json.loads(extra['value'])
                    if len(value):
                        package_dict['extras'].append({
                            'key': target,
                            'value': value[0]
                        })

        value_map = {'contact-email': ['maintainer_email', 'author_email']}

        for source, target in value_map.iteritems():
            for extra in package_dict['extras']:
                if extra['key'] == source and len(extra['value']):
                    for target_key in target:
                        package_dict[target_key] = extra['value']

        map = {'responsible-party': ['maintainer', 'author']}

        harvester_context = {'model': model, 'session': Session, 'user': '******'}
        for source, target in map.iteritems():
            for extra in package_dict['extras']:
                if extra['key'] == source:
                    value = json.loads(extra['value'])
                    if len(value):
                        for target_key in target:
                            package_dict[target_key] = value[0]['name']

                        # find responsible party from orgs
                        try:
                            name = munge_title_to_name(value[0]['name'])
                            group = get_action('organization_show')(harvester_context, {'id': name})
                            package_dict['owner_org'] = group['id']
                        except NotFound:
                            pass

        config_obj = json.loads(data_dict['harvest_object'].source.config)
        license_from_source = config_obj.get("license", None)

        for extra in package_dict['extras']:
            if extra['key'] == 'resource-type' and len(extra['value']):
                    if extra['value'] == 'dataset':
                        value = 'paikkatietoaineisto'
                    elif extra['value'] == 'series':
                        value = 'paikkatietoaineistosarja'
                    elif extra['value'] == 'service':
                        value = 'paikkatietopalvelu'
                        for temp_extra in package_dict['extras']:
                            if temp_extra['key'] == 'collection_type':
                                temp_extra['value'] = 'Interoperability Tools'
                    else:
                        continue

                    package_dict['content_type'] = value
                    flattened = flatten_dict(package_dict)
                    convert_to_tags_string('content_type')(('content_type',), flattened, {}, context)
                    package_dict = unflatten(flattened)

            if license_from_source is None:
                if extra['key'] == 'licence':
                    value = json.loads(extra['value'])
                    if len(value):
                        package_dict['license'] = value
                        urls = []
                        for i in value:
                            urls += re.findall(r'(https?://\S+)', i)
                        if len(urls):
                            if urls[0].endswith('.'):
                                urls[0] = urls[0][:-1]
                            package_dict['extras'].append({
                                "key": 'license_url',
                                'value': urls[0]
                            })
            else:
                package_dict['license_id'] = license_from_source

            if extra['key'] == 'dataset-reference-date' and len(extra['value']):
                value = json.loads(extra['value'])
                for dates in value:
                    if dates.get("type") == "creation":
                        package_dict['extras'].append({
                            "key": 'resource_created',
                            'value': dates.get("value")
                        })
                    elif dates.get("type") == "publication":
                        package_dict['extras'].append({
                            "key": 'resource_published',
                            'value': dates.get("value")
                        })
                    elif dates.get("type") == "revision":
                        package_dict['extras'].append({
                            "key": 'resource_modified',
                            'value': dates.get("value")
                        })

        # topic category for syke

        topic_categories = data_dict['iso_values'].get('topic-category')
        if topic_categories:
            for category in topic_categories:
                category = category[:50] if len(category) > 50 else category
                package_dict['tags'].append({'name': category})

        return package_dict
Beispiel #58
0
 def _member_list(self, obj_type=None, capacity=None, group_id=None):
     ctx, dd = self._build_context(None, obj_type, capacity, group_id)
     return logic.get_action("member_list")(ctx, dd)
Beispiel #59
0
def user_create(context, data_dict):
    '''Create a new user.

    You must be authorized to create users.

    :param name: the name of the new user, a string between 2 and 100
        characters in length, containing only lowercase alphanumeric
        characters, ``-`` and ``_``
    :type name: string
    :param email: the email address for the new user
    :type email: string
    :param password: the password of the new user, a string of at least 4
        characters
    :type password: string
    :param id: the id of the new user (optional)
    :type id: string
    :param fullname: the full name of the new user (optional)
    :type fullname: string
    :param about: a description of the new user (optional)
    :type about: string
    :param openid: (optional)
    :type openid: string

    :returns: the newly created yser
    :rtype: dictionary

    '''
    model = context['model']
    schema = context.get('schema') or ckan.logic.schema.default_user_schema()
    session = context['session']

    _check_access('user_create', context, data_dict)

    data, errors = _validate(data_dict, schema, context)

    if errors:
        session.rollback()
        raise ValidationError(errors)

    user = model_save.user_dict_save(data, context)

    # Flush the session to cause user.id to be initialised, because
    # activity_create() (below) needs it.
    session.flush()

    activity_create_context = {
        'model': model,
        'user': context['user'],
        'defer_commit': True,
        'ignore_auth': True,
        'session': session
    }
    activity_dict = {
        'user_id': user.id,
        'object_id': user.id,
        'activity_type': 'new user',
    }
    logic.get_action('activity_create')(activity_create_context, activity_dict)

    if not context.get('defer_commit'):
        model.repo.commit()

    # A new context is required for dictizing the newly constructed user in
    # order that all the new user's data is returned, in particular, the
    # api_key.
    #
    # The context is copied so as not to clobber the caller's context dict.
    user_dictize_context = context.copy()
    user_dictize_context['keep_apikey'] = True
    user_dictize_context['keep_email'] = True
    user_dict = model_dictize.user_dictize(user, user_dictize_context)

    context['user_obj'] = user
    context['id'] = user.id

    model.Dashboard.get(user.id)  # Create dashboard for user.

    log.debug('Created user {name}'.format(name=user.name))
    return user_dict
Beispiel #60
0
 def _member_create_in_group(self, obj, obj_type, capacity, group_id):
     """Makes the given object a member of the given group."""
     ctx, dd = self._build_context(obj, obj_type, capacity, group_id)
     return logic.get_action("member_create")(ctx, dd)