예제 #1
0
 def test_form_submit_success_xml_file_from_upload(self, app, monkeypatch,
                                                   tmpdir, ckan_config):
     _patch_storage_path(monkeypatch, tmpdir, ckan_config)
     files = {'upload': 'ddi_test.xml'}
     resp = _post_request(app,
                          '/dataset/import', {},
                          files,
                          self.extra_environ,
                          status=302)
     expected_id = 'ddi-test-1'
     try:
         toolkit.requires_ckan_version("2.9")
         assert ('/dataset/{}/resource/new'.format(expected_id)
                 in resp.headers['location'])
     except toolkit.CkanVersionException:
         assert ('/dataset/new_resource/{}'.format(expected_id)
                 in resp.headers['location'])
     dataset = toolkit.get_action('package_show')({
         'ignore_auth': True
     }, {
         'id': expected_id
     })
     assert dataset
     assert len(dataset['resources']) == 1
     assert 'ddi_test.xml' in dataset['resources'][0]['url']
    def gather_stage(self, harvest_job):
        log.debug('In SixodpHarvester gather_stage (%s)',
                  harvest_job.source.url)
        toolkit.requires_ckan_version(min_version='2.0')
        get_all_packages = True

        self._set_config(harvest_job.source.config)

        # Get source URL
        remote_ckan_base_url = harvest_job.source.url.rstrip('/')

        # Filter in/out datasets from particular organizations
        fq_terms = []
        org_filter_include = self.config.get('organizations_filter_include', [])
        org_filter_exclude = self.config.get('organizations_filter_exclude', [])
        if org_filter_include:
            fq_terms.append(' OR '.join(
                'organization:%s' % org_name for org_name in org_filter_include))
        elif org_filter_exclude:
            fq_terms.extend(
                '-organization:%s' % org_name for org_name in org_filter_exclude)

        force_all = self.config.get('force_all', False)
        delete_missing = self.config.get('delete_missing', False)

        # Ideally we can request from the remote CKAN only those datasets
        # modified since the last completely successful harvest.
        last_error_free_job = self.last_error_free_job(harvest_job)
        log.debug('Last error-free job: %r', last_error_free_job)
        if last_error_free_job and not (force_all or delete_missing):
            get_all_packages = False

            # Request only the datasets modified since
            last_time = last_error_free_job.gather_started
            # Note: SOLR works in UTC, and gather_started is also UTC, so
            # this should work as long as local and remote clocks are
            # relatively accurate. Going back a little earlier, just in case.
            get_changes_since = \
                (last_time - datetime.timedelta(hours=1)).isoformat()
            log.info('Searching for datasets modified since: %s UTC',
                     get_changes_since)

            fq_since_last_time = 'metadata_modified:[{since}Z TO *]' \
                .format(since=get_changes_since)

            try:
                pkg_dicts = self._search_for_datasets(
                    remote_ckan_base_url,
                    fq_terms + [fq_since_last_time])
            except SearchError, e:
                log.info('Searching for datasets changed since last time '
                         'gave an error: %s', e)
                get_all_packages = True

            if not get_all_packages and not pkg_dicts:
                log.info('No datasets have been updated on the remote '
                         'CKAN instance since the last harvest job %s',
                         last_time)
                return []
    def gather_stage(self, harvest_job):
        log.error('In MetarepoHarvester gather_stage (%s)',
                  harvest_job.source.url)
        toolkit.requires_ckan_version(min_version='2.0')
        get_all_packages = True

        self._set_config(harvest_job.source.config)

        # Get source URL
        remote_ckan_base_url = harvest_job.source.url.rstrip('/')

        # Filter in/out datasets from particular organizations
        fq_terms = []
        org_filter_include = self.config.get('organizations_filter_include',
                                             [])
        org_filter_exclude = self.config.get('organizations_filter_exclude',
                                             [])
        if org_filter_include:
            fq_terms.append(' OR '.join('organization:%s' % org_name
                                        for org_name in org_filter_include))
        elif org_filter_exclude:
            fq_terms.extend('-organization:%s' % org_name
                            for org_name in org_filter_exclude)

        # Ideally we can request from the remote Metarepo only those datasets
        # modified since the last completely successful harvest.
        last_error_free_job = self._last_error_free_job(harvest_job)
        log.debug('Last error-free job: %r', last_error_free_job)
        if (last_error_free_job and not self.config.get('force_all', False)):
            get_all_packages = False

            # Request only the datasets modified since
            last_time = last_error_free_job.gather_started
            # Note: SOLR works in UTC, and gather_started is also UTC, so
            # this should work as long as local and remote clocks are
            # relatively accurate. Going back a little earlier, just in case.
            get_changes_since = \
                (last_time - datetime.timedelta(hours=1)).isoformat()
            log.info('Searching for datasets modified since: %s UTC',
                     get_changes_since)

            fq_since_last_time = 'metadata_modified:[{since}Z TO *]' \
                .format(since=get_changes_since)

            try:
                pkg_dicts = self._search_for_datasets(
                    remote_ckan_base_url, fq_terms + [fq_since_last_time])
            except SearchError, e:
                log.info(
                    'Searching for datasets changed since last time '
                    'gave an error: %s', e)
                get_all_packages = True

            if not get_all_packages and not pkg_dicts:
                log.info(
                    'No datasets have been updated on the remote '
                    'Metarepo instance since the last harvest job %s',
                    last_time)
                return []
예제 #4
0
    def gather_stage(self, harvest_job):
        log.debug("In CKANHarvester gather_stage (%s)", harvest_job.source.url)
        toolkit.requires_ckan_version(min_version="2.0")
        get_all_packages = True

        self._set_config(harvest_job.source.config)

        # Get source URL
        remote_ckan_base_url = harvest_job.source.url.rstrip("/")

        # Filter in/out datasets from particular organizations
        fq_terms = []
        org_filter_include = self.config.get("organizations_filter_include", [])
        org_filter_exclude = self.config.get("organizations_filter_exclude", [])
        if org_filter_include:
            fq_terms.append(" OR ".join("organization:%s" % org_name for org_name in org_filter_include))
        elif org_filter_exclude:
            fq_terms.extend("-organization:%s" % org_name for org_name in org_filter_exclude)

        # Ideally we can request from the remote CKAN only those datasets
        # modified since the last completely successful harvest.
        last_error_free_job = self._last_error_free_job(harvest_job)
        log.debug("Last error-free job: %r", last_error_free_job)
        if last_error_free_job and not self.config.get("force_all", False):
            get_all_packages = False

            # Request only the datasets modified since
            last_time = last_error_free_job.gather_started
            # Note: SOLR works in UTC, and gather_started is also UTC, so
            # this should work as long as local and remote clocks are
            # relatively accurate. Going back a little earlier, just in case.
            get_changes_since = (last_time - datetime.timedelta(hours=1)).isoformat()
            log.info("Searching for datasets modified since: %s UTC", get_changes_since)

            fq_since_last_time = "metadata_modified:[{since}Z TO *]".format(since=get_changes_since)

            try:
                pkg_dicts = self._search_for_datasets(remote_ckan_base_url, fq_terms + [fq_since_last_time])
            except SearchError, e:
                log.info("Searching for datasets changed since last time " "gave an error: %s", e)
                get_all_packages = True

            if not get_all_packages and not pkg_dicts:
                log.info(
                    "No datasets have been updated on the remote " "CKAN instance since the last harvest job %s",
                    last_time,
                )
                return None
 def test_no_raise(self):
     tk.ckan.__version__ = '2'
     tk.requires_ckan_version(min_version='2')
예제 #6
0
from ckan.lib.base import BaseController, c, render, request
from . import dbutil

import ckan.logic as logic
import hashlib
from . import plugin
from pylons import config

from paste.util.multidict import MultiDict

from ckan.controllers.api import ApiController

from ckan.exceptions import CkanVersionException
import ckan.plugins.toolkit as tk
try:
    tk.requires_ckan_version("2.9")
except CkanVersionException:
    pass
else:
    from builtins import str

log = logging.getLogger("ckanext.googleanalytics")


class GAController(BaseController):
    def view(self):
        # get package objects corresponding to popular GA content
        c.top_resources = dbutil.get_top_resources(limit=10)
        return render("summary.html")

예제 #7
0
    def gather_stage(self, harvest_job):
        log.debug('In CKANHarvester gather_stage (%s)', harvest_job.source.url)
        toolkit.requires_ckan_version(min_version='2.0')
        get_all_packages = True

        self._set_config(harvest_job.source.config)

        # Get source URL
        remote_ckan_base_url = harvest_job.source.url.rstrip('/')

        # Filter in/out datasets from particular organizations
        fq_terms = []
        custom_filter = self.config.get('custom_filter')
        if custom_filter:
            fq_terms += [custom_filter]
        else:
            org_filter_include = self.config.get(
                'organizations_filter_include', [])
            org_filter_exclude = self.config.get(
                'organizations_filter_exclude', [])
            if org_filter_include:
                fq_terms.append(' OR '.join(
                    'organization:%s' % org_name
                    for org_name in org_filter_include))
            elif org_filter_exclude:
                fq_terms.extend('-organization:%s' % org_name
                                for org_name in org_filter_exclude)

            groups_filter_include = self.config.get('groups_filter_include',
                                                    [])
            groups_filter_exclude = self.config.get('groups_filter_exclude',
                                                    [])
            if groups_filter_include:
                fq_terms.append(' OR '.join(
                    'groups:%s' % group_name
                    for group_name in groups_filter_include))
            elif groups_filter_exclude:
                fq_terms.extend('-groups:%s' % group_name
                                for group_name in groups_filter_exclude)

        # Ideally we can request from the remote CKAN only those datasets
        # modified since the last completely successful harvest.
        last_error_free_job = self.last_error_free_job(harvest_job)
        log.debug('Last error-free job: %r', last_error_free_job)
        if (last_error_free_job and not self.config.get('force_all', False)):
            get_all_packages = False

            # Request only the datasets modified since
            last_time = last_error_free_job.gather_started
            # Note: SOLR works in UTC, and gather_started is also UTC, so
            # this should work as long as local and remote clocks are
            # relatively accurate. Going back a little earlier, just in case.
            get_changes_since = \
                (last_time - datetime.timedelta(hours=1)).isoformat()
            log.info('Searching for datasets modified since: %s UTC',
                     get_changes_since)

            fq_since_last_time = 'metadata_modified:[{since}Z TO *]' \
                .format(since=get_changes_since)

            try:
                pkg_dicts = self._search_for_datasets(
                    remote_ckan_base_url, fq_terms + [fq_since_last_time])
            except SearchError as e:
                log.info(
                    'Searching for datasets changed since last time '
                    'gave an error: %s', e)
                get_all_packages = True

            if not get_all_packages and not pkg_dicts:
                log.info(
                    'No datasets have been updated on the remote '
                    'CKAN instance since the last harvest job %s', last_time)
                return []

        # Fall-back option - request all the datasets from the remote CKAN
        if get_all_packages:
            # Request all remote packages
            try:
                pkg_dicts = self._search_for_datasets(remote_ckan_base_url,
                                                      fq_terms)
            except SearchError as e:
                log.info('Searching for all datasets gave an error: %s', e)
                self._save_gather_error(
                    'Unable to search remote CKAN for datasets:%s url:%s'
                    'terms:%s' % (e, remote_ckan_base_url, fq_terms),
                    harvest_job)
                return None
        if not pkg_dicts:
            self._save_gather_error(
                'No datasets found at CKAN: %s' % remote_ckan_base_url,
                harvest_job)
            return []

        # Create harvest objects for each dataset
        try:
            package_ids = set()
            object_ids = []
            for pkg_dict in pkg_dicts:
                if pkg_dict['id'] in package_ids:
                    log.info(
                        'Discarding duplicate dataset %s - probably due '
                        'to datasets being changed at the same time as '
                        'when the harvester was paging through',
                        pkg_dict['id'])
                    continue
                package_ids.add(pkg_dict['id'])

                log.debug('Creating HarvestObject for %s %s', pkg_dict['name'],
                          pkg_dict['id'])
                obj = HarvestObject(guid=pkg_dict['id'],
                                    job=harvest_job,
                                    content=json.dumps(pkg_dict))
                obj.save()
                object_ids.append(obj.id)

            return object_ids
        except Exception as e:
            self._save_gather_error('%r' % e.message, harvest_job)
import ckan.plugins as plugins
import ckan.plugins.toolkit as toolkit
from ckan.exceptions import CkanVersionException

import ckanext.opendata_theme.opengov_custom_homepage.helpers as helper
from ckanext.opendata_theme.opengov_custom_homepage.constants import CUSTOM_NAMING, CUSTOM_STYLE

try:
    toolkit.requires_ckan_version("2.9")
except CkanVersionException:
    from ckanext.opendata_theme.opengov_custom_homepage.plugin.pylons_plugin import MixinPlugin
else:
    from ckanext.opendata_theme.opengov_custom_homepage.plugin.flask_plugin import MixinPlugin
from ckanext.opendata_theme.base.template_helpers import version_builder


class Opendata_ThemePlugin(MixinPlugin):
    plugins.implements(plugins.IConfigurable, inherit=True)
    plugins.implements(plugins.IConfigurer)
    plugins.implements(plugins.ITemplateHelpers)

    plugins.implements(plugins.IConfigurable, inherit=True)
    plugins.implements(plugins.IConfigurer)
    plugins.implements(plugins.ITemplateHelpers)

    # IConfigurer
    def update_config(self, ckan_config):
        toolkit.add_template_directory(ckan_config, '../templates')

        if toolkit.check_ckan_version(min_version='2.4', max_version='2.9'):
            toolkit.add_ckan_admin_tab(ckan_config, 'custom_home_page',
예제 #9
0
def test_raise(monkeypatch):
    monkeypatch.setattr(tk.ckan, u"__version__", u"2")
    with pytest.raises(tk.CkanVersionException):
        tk.requires_ckan_version(min_version=u"3")
예제 #10
0
def test_no_raise(monkeypatch):
    monkeypatch.setattr(tk.ckan, u"__version__", u"2")
    tk.requires_ckan_version(min_version=u"2")
예제 #11
0
파일: test_toolkit.py 프로젝트: Ezio47/ckan
 def test_no_raise(self):
     tk.ckan.__version__ = '2'
     tk.requires_ckan_version(min_version='2')
예제 #12
0
    def post(self, package_type):
        self._check_auth()

        pkg_id = None
        file_path = None

        data = self._clean_request_form()

        try:
            user = toolkit.c.user
            importer = ddiimporter.DdiImporter(username=user)

            if isinstance(data.get('upload'), FileStorage):
                log.debug('upload: %s' % data['upload'])
                file_path = self._save_temp_file(data['upload'].stream)
                log.debug('file_path: %s' % file_path)
                pkg_id = importer.run(
                    file_path=file_path,
                    upload=data['upload'],
                    data=data,
                )
            elif data.get('url'):
                log.debug('url: %s' % data['url'])
                pkg_id = importer.run(
                    url=data['url'],
                    data=data,
                )
            else:
                raise PackageImportError(
                    'An XML file (uploaded file or URL) is required')

            registry = ckanapi.LocalCKAN(username=user)

            resource_dict = {
                'package_id': pkg_id,
                'name': 'DDI RDF',
                'format': 'rdf',
                'url': '',
                'type': 'attachment',
                'file_type': 'other',
                'visibility': data.get('visibility', 'restricted')
            }
            if isinstance(data.get('rdf_upload'), FileStorage):
                resource_dict['upload'] = data['rdf_upload']
                registry.call_action('resource_create', resource_dict)
            elif data.get('rdf_url'):
                resource_dict['url'] = data['rdf_url']
                registry.call_action('resource_create', resource_dict)

            toolkit.h.flash_success(
                toolkit._('Dataset import from XML successfully completed. ' +
                          'You can now add data files to it.'))
        except toolkit.ValidationError as e:
            errors = e.error_dict
            error_summary = e.error_summary
            return self.get(package_type, data, errors, error_summary)
        except Exception as e:
            errors = {
                'import':
                toolkit._('Dataset import from XML failed: %s' % str(e))
            }
            return self.get(package_type, data, errors)
        finally:
            if file_path is not None:
                os.remove(file_path)

        if pkg_id is not None:
            try:
                toolkit.requires_ckan_version("2.9")
                url = toolkit.h.url_for(
                    u'{}_resource.new'.format(package_type),
                    id=pkg_id,
                )
            except toolkit.CkanVersionException:
                url = toolkit.h.url_for(
                    controller='package',
                    action='new_resource',
                    id=pkg_id,
                )
            return toolkit.redirect_to(url)
        else:
            return toolkit.redirect_to(toolkit.h.url_for('ddi_import.import'))
    def gather_stage(self, harvest_job):
        '''
        The gather stage will receive a HarvestJob object and will be
        responsible for:
            - gathering all the necessary objects to fetch on a later.
              stage (e.g. for a CSW server, perform a GetRecords request)
            - creating the necessary HarvestObjects in the database, specifying
              the guid and a reference to its job. The HarvestObjects need a
              reference date with the last modified date for the resource, this
              may need to be set in a different stage depending on the type of
              source.
            - creating and storing any suitable HarvestGatherErrors that may
              occur.
            - returning a list with all the ids of the created HarvestObjects.
            - to abort the harvest, create a HarvestGatherError and raise an
              exception. Any created HarvestObjects will be deleted.

        :param harvest_job: HarvestJob object
        :returns: A list of HarvestObject ids
        '''
        log.debug('In DataNorgeHarvester gather_stage (%s)',
                  harvest_job.source.url)
        toolkit.requires_ckan_version(min_version='2.0')
        get_all_packages = True

        self._set_config(harvest_job.source.config)

        # Get source URL
        remote_datanorge_base_url = harvest_job.source.url.rstrip('/')

        pkg_dicts = []

        # Ideally we can request from the remote Datanorge only those datasets
        # modified since the last completely successful harvest.
        last_error_free_job = self._last_error_free_job(harvest_job)

        if (last_error_free_job and not self.config.get('force_all', False)):
            get_all_packages = False

            # Request only the datasets modified since
            last_time = last_error_free_job.gather_started
            # Note: SOLR works in UTC, and gather_started is also UTC, so
            # this should work as long as local and remote clocks are
            # relatively accurate. Going back a little earlier, just in case.
            get_changes_since = \
                (str(last_time - datetime.timedelta(hours=1)).split(' '))[0]
            log.info('Searching for datasets modified since: %s UTC',
                     get_changes_since)

            try:
                # Add the result from the search to pkg_dicts.
                pkg_dicts.extend(
                    self._search_for_datasets(remote_datanorge_base_url,
                                              get_changes_since))

            except SearchError, e:
                log.info(
                    'Searching for datasets changed since last time '
                    'gave an error: %s', e)
                get_all_packages = True

            if not get_all_packages and not pkg_dicts:
                log.info(
                    'No datasets have been updated on the remote '
                    'DataNorge instance since the last harvest job %s',
                    last_time)
                return None
예제 #14
0
    def gather_stage(self, harvest_job):
        '''
        The gather stage will receive a HarvestJob object and will be
        responsible for:
            - gathering all the necessary objects to fetch on a later.
              stage (e.g. for a CSW server, perform a GetRecords request)
            - creating the necessary HarvestObjects in the database, specifying
              the guid and a reference to its job. The HarvestObjects need a
              reference date with the last modified date for the resource, this
              may need to be set in a different stage depending on the type of
              source.
            - creating and storing any suitable HarvestGatherErrors that may
              occur.
            - returning a list with all the ids of the created HarvestObjects.
            - to abort the harvest, create a HarvestGatherError and raise an
              exception. Any created HarvestObjects will be deleted.

        :param harvest_job: HarvestJob object
        :returns: A list of HarvestObject ids
        '''
        log.debug('In GeonorgeHarvester gather_stage (%s)',
                  harvest_job.source.url)
        toolkit.requires_ckan_version(min_version='2.0')
        get_all_packages = True

        self._set_config(harvest_job.source.config)

        # Get source URL
        remote_geonorge_base_url = harvest_job.source.url.rstrip('/')

        pkg_dicts = []

        # Retrieves the element at index 'index' from a list '_list'
        def get_item_from_list(_list, index):
            counter = 0
            for item in _list:
                if counter == index:
                    return item
                counter += 1

        '''
        This makes a list with lists of all possible search-combinations
        needed to search for everything specified in the config: '''
        filter_include = {}
        fq_terms_list_length = 1
        for filter_item in self.config:
            if filter_item in ['text', 'title', 'uuid']:
                config_item = self.config[filter_item]
                if isinstance(config_item, basestring):
                    config_item = [config_item]
                filter_include[filter_item] = config_item
                fq_terms_list_length *= len(filter_include[filter_item])
            elif filter_item in ['datatypes', 'organizations', 'themes']:
                # There was a key error when having filter_item = 'type'
                # This is fixed by setting it to 'datatype' and update it
                # to 'type' here (same goes for 'organization' and 'organizations'):
                config_item = self.config[filter_item]
                if isinstance(config_item, basestring):
                    config_item = [config_item]
                if filter_item == 'datatypes':
                    filter_include['type'] = config_item
                    fq_terms_list_length *= len(filter_include['type'])
                elif filter_item == 'organizations':
                    filter_include['organization'] = config_item
                    fq_terms_list_length *= len(filter_include['organization'])
                elif filter_item == 'themes':
                    filter_include['theme'] = config_item
                    fq_terms_list_length *= len(filter_include['theme'])
        # Set type to be 'dataset' by default:
        if not 'type' in filter_include:
            filter_include['type'] = ['dataset']
        fq_terms_list = [{} for i in range(fq_terms_list_length)]

        switchnum_max = 1
        filter_counter = 0
        for filter_item in filter_include:
            switchnum_counter = 0
            search_counter = 0
            for search in range(fq_terms_list_length):
                if switchnum_counter == switchnum_max:
                    search_counter += 1
                    switchnum_counter = 0
                switchnum_counter += 1
                fq_terms_list[search][filter_item] = \
                    filter_include[filter_item][search_counter \
                        % len(filter_include[filter_item])]
            temp_filter_item = get_item_from_list(filter_include,
                                                  filter_counter)
            if temp_filter_item is not None:
                switchnum_max *= len(filter_include[temp_filter_item])
            filter_counter += 1
        ''' End of search-combination making.
        All combination of search parameters is now stored in the list:
        fq_terms_list which is a list of dictionaries. Each dictionary in the
        list contains one search to be made.
        '''

        # Ideally we can request from the remote Geonorge only those datasets
        # modified since the last completely successful harvest.
        last_error_free_job = self._last_error_free_job(harvest_job)

        if (last_error_free_job and not self.config.get('force_all', False)):
            get_all_packages = False

            # Request only the datasets modified since
            last_time = last_error_free_job.gather_started
            # Note: SOLR works in UTC, and gather_started is also UTC, so
            # this should work as long as local and remote clocks are
            # relatively accurate. Going back a little earlier, just in case.
            get_changes_since = \
                (last_time - datetime.timedelta(hours=1)).isoformat()
            log.info('Searching for datasets modified since: %s UTC' %
                     get_changes_since)

            try:
                # For every dictionary of search parameters in fq_terms_list:
                # add the result from the search to pkg_dicts.
                for fq_terms in fq_terms_list:
                    pkg_dicts.extend(
                        self._search_for_datasets(remote_geonorge_base_url,
                                                  fq_terms))

                pkg_dicts = \
                    self._get_modified_datasets(pkg_dicts,
                                                remote_geonorge_base_url,
                                                get_changes_since)

            except SearchError, e:
                log.info(
                    'Searching for datasets changed since last time '
                    'gave an error: %s', e)
                get_all_packages = True

            if not get_all_packages and not pkg_dicts:
                log.info(
                    'No datasets have been updated on the remote '
                    'Geonorge instance since the last harvest job %s',
                    last_time)
                return None