Ejemplo n.º 1
0
    def check_config(self):
        exit = False
        for key in ('ckanext.s3filestore.aws_access_key_id',
                    'ckanext.s3filestore.aws_secret_access_key',
                    'ckanext.s3filestore.aws_bucket_name'):
            if not config.get(key):
                print 'You must set the "{0}" option in your ini file'.format(
                    key)
                exit = True
        if exit:
            sys.exit(1)

        print 'All configuration options defined'
        bucket_name = config.get('ckanext.s3filestore.aws_bucket_name')
        public_key = config.get('ckanext.s3filestore.aws_access_key_id')
        secret_key = config.get('ckanext.s3filestore.aws_secret_access_key')

        S3_conn = boto.connect_s3(public_key, secret_key)

        # Check if bucket exists
        bucket = S3_conn.lookup(bucket_name)
        if bucket is None:
            print 'Bucket {0} does not exist, trying to create it...'.format(
                bucket_name)
            try:
                bucket = S3_conn.create_bucket(bucket_name)
            except boto.exception.StandardError as e:
                print 'An error was found while creating the bucket:'
                print str(e)
                sys.exit(1)
        print 'Configuration OK!'
Ejemplo n.º 2
0
def catalog_uri():
    '''
    Returns an URI for the whole catalog

    This will be used to uniquely reference the CKAN instance on the RDF
    serializations and as a basis for eg datasets URIs (if not present on
    the metadata).

    The value will be the first found of:

        1. The `ckanext.dcat.base_uri` config option (recommended)
        2. The `ckan.site_url` config option
        3. `http://` + the `app_instance_uuid` config option (minus brackets)

    A warning is emited if the third option is used.

    Returns a string with the catalog URI.
    '''

    uri = config.get('ckanext.dcat.base_uri')
    if not uri:
        uri = config.get('ckan.site_url')
    if not uri:
        app_uuid = config.get('app_instance_uuid')
        if app_uuid:
            uri = 'http://' + app_uuid.replace('{', '').replace('}', '')
            log.critical('Using app id as catalog URI, you should set the ' +
                         '`ckanext.dcat.base_uri` or `ckan.site_url` option')
        else:
            uri = 'http://' + str(uuid.uuid4())
            log.critical('Using a random id as catalog URI, you should set ' +
                         'the `ckanext.dcat.base_uri` or `ckan.site_url` ' +
                         'option')

    return uri
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        eq_(unicode(catalog), utils.catalog_uri())

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
Ejemplo n.º 4
0
    def test_pagination_keeps_only_supported_params(self, mock_request):

        mock_request.params = {
            'a': 1,
            'b': 2,
            'modified_since': '2018-03-22',
            'profiles': 'schemaorg'
        }
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in range(10)],
        }
        data_dict = {'page': None}

        pagination = _pagination_info(query, data_dict)

        assert pagination['count'] == 12
        assert pagination['items_per_page'] == config.get(
            'ckanext.dcat.datasets_per_page')
        assert pagination['current'] == 'http://example.com/feed/catalog.xml?' \
                                        'modified_since=2018-03-22&profiles=schemaorg&page=1'
        assert pagination[
            'first'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1'
        assert pagination[
            'last'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2'
        assert pagination[
            'next'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2'
        assert 'previous' not in pagination
Ejemplo n.º 5
0
    def test_resource_upload(self):
        '''Test a basic resource file upload'''
        factories.Sysadmin(apikey="my-test-key")

        app = self._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket='my-bucket', Key=key)

        #conn = boto.connect_s3()
        #bucket = conn.get_bucket('my-bucket')
        # test the key exists
        #assert_true(bucket.lookup(key))
        # test the file contains what's expected
        obj = s3.get_object(Bucket='my-bucket', Key=key)
        data = obj['Body'].read()
        assert_equal(data, open(file_path).read())
Ejemplo n.º 6
0
    def test_resource_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        app = self._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        dataset = factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))

        # clear upload
        url = toolkit.url_for(controller='package', action='resource_edit',
                              id=dataset['id'], resource_id=resource['id'])
        env = {'REMOTE_USER': sysadmin['name'].encode('ascii')}
        app.post(url, {'clear_upload': True,
                       'url': 'http://asdf', 'save': 'save'},
                 extra_environ=env)

        # key shouldn't exist
        assert_false(bucket.lookup(key))
Ejemplo n.º 7
0
    def test_group_image_upload(self):
        '''Test a group image file upload'''
        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = 'somename.png'

        img_uploader = Uploader(file_name, file=open(file_path))

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create', context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name,
                                save='save')

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))

        # requesting image redirects to s3
        app = self._get_test_app()
        # attempt redirect to linked url
        image_file_url = '/uploads/group/{0}'.format(file_name)
        r = app.get(image_file_url, status=[302, 301])
        assert_equal(r.location, 'https://my-bucket.s3.amazonaws.com/my-path/storage/uploads/group/{0}'
                                 .format(file_name))
Ejemplo n.º 8
0
    def test_pagination_keeps_only_supported_params(self, mock_request):

        mock_request.params = {'a': 1, 'b': 2, 'modified_since': '2018-03-22', 'profiles': 'schemaorg'}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 12)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1')
        eq_(pagination['first'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1')
        eq_(pagination['last'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2')
        eq_(pagination['next'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2')
        assert 'previous' not in pagination
Ejemplo n.º 9
0
    def test_pagination_same_results_than_page_size(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = ''

        # No page defined (defaults to 1)
        query = {
            'count': 10,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 10)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://example.com?page=1')
        eq_(pagination['first'], 'http://example.com?page=1')
        eq_(pagination['last'], 'http://example.com?page=1')
        assert 'next' not in pagination
        assert 'previous' not in pagination
Ejemplo n.º 10
0
def prepare_summary_mail(context, source_id, status):
    extra_vars = get_mail_extra_vars(context, source_id, status)
    body = render('emails/summary_email.txt', extra_vars)
    subject = '{} - Harvesting Job Successful - Summary Notification'\
        .format(config.get('ckan.site_title'))

    return subject, body
Ejemplo n.º 11
0
def prepare_error_mail(context, source_id, status):
    extra_vars = get_mail_extra_vars(context, source_id, status)
    body = render('emails/error_email.txt', extra_vars)
    subject = '{} - Harvesting Job - Error Notification'\
        .format(config.get('ckan.site_title'))

    return subject, body
Ejemplo n.º 12
0
    def _gen_new_name(cls, title, existing_name=None, append_type=None):
        '''
        Returns a 'name' for the dataset (URL friendly), based on the title.

        If the ideal name is already used, it will append a number to it to
        ensure it is unique.

        If generating a new name because the title of the dataset has changed,
        specify the existing name, in case the name doesn't need to change
        after all.

        :param existing_name: the current name of the dataset - only specify
                              this if the dataset exists
        :type existing_name: string
        :param append_type: the type of characters to add to make it unique -
                            either 'number-sequence' or 'random-hex'.
        :type append_type: string
        '''

        # If append_type was given, use it. Otherwise, use the configured default.
        # If nothing was given and no defaults were set, use 'number-sequence'.
        if append_type:
            append_type_param = append_type
        else:
            append_type_param = config.get(
                'ckanext.harvest.default_dataset_name_append',
                'number-sequence')

        ideal_name = munge_title_to_name(title)
        ideal_name = re.sub('-+', '-', ideal_name)  # collapse multiple dashes
        return cls._ensure_name_is_unique(ideal_name,
                                          existing_name=existing_name,
                                          append_type=append_type_param)
Ejemplo n.º 13
0
def get_dataset_rdf_url(default=False):
    url = toolkit.request.url
    url = url.split('?')[0]
    dataset_name = url.split('/')[-1]
    dataset_type = toolkit.get_action('package_show')(None, {
        'id': dataset_name
    })['type']

    uri = config.get('ckanext.dcat.base_uri')
    if not uri:
        uri = config.get('ckan.site_url')

    if default:
        return uri + '/' + 'dataset' + '/' + dataset_name  # + '.ttl?profiles=gsq_dataset'
    else:
        return uri + '/' + 'dataset' + '/' + dataset_name  # + '.ttl?profiles=gsq_' + dataset_type + ',gsq_dataset'
Ejemplo n.º 14
0
def get_validation_badge(resource, in_listing=False):

    if in_listing and not asbool(
            config.get('ckanext.validation.show_badges_in_listings', True)):
        return ''

    if not resource.get('validation_status'):
        return ''

    messages = {
        'success': _('Valid data'),
        'failure': _('Invalid data'),
        'error': _('Error during validation'),
        'unknown': _('Data validation unknown'),
    }

    if resource['validation_status'] in ['success', 'failure', 'error']:
        status = resource['validation_status']
    else:
        status = 'unknown'

    validation_url = url_for('validation_read',
                             id=resource['package_id'],
                             resource_id=resource['id'])

    badge_url = url_for_static(
        '/images/badges/data-{}-flat.svg'.format(status))

    return u'''
<a href="{validation_url}" class="validation-badge">
    <img src="{badge_url}" alt="{alt}" title="{title}"/>
</a>'''.format(validation_url=validation_url,
               badge_url=badge_url,
               alt=messages[status],
               title=resource.get('validation_timestamp', ''))
Ejemplo n.º 15
0
def show_validation_schemas():
    """ Returns a list of validation schemas"""
    schema_directory = config.get('ckanext.validation.schema_directory')
    if schema_directory:
        return _files_from_directory(schema_directory).keys()
    else:
        return []
    def test_pagination_keeps_params(self, mock_request):

        mock_request.params = {'a': 1, 'b': 2}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in xrange(10)],
        }
        data_dict = {'page': None}

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 12)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=1')
        eq_(pagination['first'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=1')
        eq_(pagination['last'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=2')
        eq_(pagination['next'],
            'http://example.com/feed/catalog.xml?a=1&b=2&page=2')
        assert 'previous' not in pagination
Ejemplo n.º 17
0
    def test_pagination_same_results_than_page_size(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = ''

        # No page defined (defaults to 1)
        query = {
            'count': 10,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 10)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://example.com?page=1')
        eq_(pagination['first'], 'http://example.com?page=1')
        eq_(pagination['last'], 'http://example.com?page=1')
        assert 'next' not in pagination
        assert 'previous' not in pagination
Ejemplo n.º 18
0
    def after_map(self, map):

        controller = 'ckanext.dcat.controllers:DCATController'
        route = config.get('ckanext.dcat.json_endpoint', '/dcat.json')
        map.connect(route, controller=controller, action='dcat_json')

        return map
Ejemplo n.º 19
0
    def test_group_image_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = "somename.png"

        img_uploader = Uploader(file_name, file=open(file_path))

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create', context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name)

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))

        # clear upload
        helpers.call_action('group_update', context=context,
                            id='my-group', name='my-group',
                            image_url="http://asdf", clear_upload=True)

        # key shouldn't exist
        assert_false(bucket.lookup(key))
Ejemplo n.º 20
0
    def test_resource_upload(self):
        '''Test a basic resource file upload'''
        factories.Sysadmin(apikey="my-test-key")

        app = helpers._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket=BUCKET_NAME, Key=key)

        # test the file contains what's expected
        obj = s3.get_object(Bucket=BUCKET_NAME, Key=key)
        data = obj['Body'].read()
        assert_equal(data, open(file_path).read())
Ejemplo n.º 21
0
def datarequest_suggested_description():
    """Returns a datarequest suggested description from admin config

    :rtype: string

    """
    return config.get('ckanext.data_qld.datarequest_suggested_description', '')
    def test_graph_from_catalog(self):

        s = RDFSerializer()
        g = s.g

        catalog = s.graph_from_catalog()

        assert str(catalog) == utils.catalog_uri()

        # Basic fields
        assert self._triple(g, catalog, RDF.type, DCAT.Catalog)
        assert self._triple(g, catalog, DCT.title,
                            config.get('ckan.site_title'))
        assert self._triple(g, catalog, FOAF.homepage,
                            URIRef(config.get('ckan.site_url')))
        assert self._triple(g, catalog, DCT.language, 'en')
Ejemplo n.º 23
0
    def _get_user_name(self):
        '''
        Returns the name of the user that will perform the harvesting actions
        (deleting, updating and creating datasets)

        By default this will be the internal site admin user. This is the
        recommended setting, but if necessary it can be overridden with the
        `ckanext.spatial.harvest.user_name` config option, eg to support the
        old hardcoded 'harvest' user:

           ckanext.spatial.harvest.user_name = harvest

        '''
        if self._user_name:
            return self._user_name

        context = {
            'model': model,
            'ignore_auth': True,
            'defer_commit': True,  # See ckan/ckan#1714
        }
        self._site_user = p.toolkit.get_action('get_site_user')(context, {})

        config_user_name = config.get('ckanext.spatial.harvest.user_name')
        if config_user_name:
            self._user_name = config_user_name
        else:
            self._user_name = self._site_user['name']

        return self._user_name
Ejemplo n.º 24
0
    def test_pagination_without_site_url(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in xrange(10)],
        }
        data_dict = {
            'page': None
        }

        pagination = _pagination_info(query, data_dict)

        eq_(pagination['count'], 12)
        eq_(pagination['items_per_page'],
            config.get('ckanext.dcat.datasets_per_page'))
        eq_(pagination['current'], 'http://ckan.example.com/feed/catalog.xml?page=1')
        eq_(pagination['first'], 'http://ckan.example.com/feed/catalog.xml?page=1')
        eq_(pagination['last'], 'http://ckan.example.com/feed/catalog.xml?page=2')
        eq_(pagination['next'], 'http://ckan.example.com/feed/catalog.xml?page=2')
        assert 'previous' not in pagination
Ejemplo n.º 25
0
    def test_pagination_with_dcat_base_uri(self, mock_request):

        mock_request.params = {}
        mock_request.host_url = 'http://ckan.example.com'
        mock_request.path = '/feed/catalog.xml'

        # No page defined (defaults to 1)
        query = {
            'count': 12,
            'results': [x for x in range(10)],
        }
        data_dict = {'page': None}

        pagination = _pagination_info(query, data_dict)

        assert pagination['count'] == 12
        assert pagination['items_per_page'] == config.get(
            'ckanext.dcat.datasets_per_page')
        assert pagination[
            'current'] == 'http://example.com/data/feed/catalog.xml?page=1'
        assert pagination[
            'first'] == 'http://example.com/data/feed/catalog.xml?page=1'
        assert pagination[
            'last'] == 'http://example.com/data/feed/catalog.xml?page=2'
        assert pagination[
            'next'] == 'http://example.com/data/feed/catalog.xml?page=2'
        assert 'previous' not in pagination
Ejemplo n.º 26
0
 def uploaded_file_redirect(self, upload_to, filename):
     '''Redirect static file requests to their location on S3.'''
     host_name = config.get('ckanext.s3filestore.host_name')
     # Remove last characted if it's a slash
     if host_name[-1] == '/':
         host_name = host_name[:-1]
     storage_path = S3Uploader.get_storage_path(upload_to)
     filepath = os.path.join(storage_path, filename)
     #host = config.get('ckanext.s3.filestore.hostname')
     # redirect_url = 'https://{bucket_name}.minio.omc.ckan.io/{filepath}' \
     #     .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
     #             filepath=filepath)
     redirect_url = '{host_name}/{bucket_name}/{filepath}'\
                       .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
                       filepath=filepath,
                       host_name=host_name)
     redirect(redirect_url)
Ejemplo n.º 27
0
 def uploaded_file_redirect(self, upload_to, filename):
     '''Redirect static file requests to their location on S3.'''
     host_name = config.get('ckanext.s3filestore.host_name')
     # Remove last characted if it's a slash
     if host_name[-1] == '/':
         host_name = host_name[:-1]
     storage_path = S3Uploader.get_storage_path(upload_to)
     filepath = os.path.join(storage_path, filename)
     #host = config.get('ckanext.s3.filestore.hostname')
     # redirect_url = 'https://{bucket_name}.minio.omc.ckan.io/{filepath}' \
     #     .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
     #             filepath=filepath)
     redirect_url = '{host_name}/{bucket_name}/{filepath}'\
                       .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'),
                       filepath=filepath,
                       host_name=host_name)
     redirect(redirect_url)
Ejemplo n.º 28
0
def profanity_checking_enabled():
    """Check to see if YTP comments extension is enabled and `check_for_profanity` is enabled

    :rtype: bool

    """
    return ytp_comments_enabled() \
        and toolkit.asbool(config.get('ckan.comments.check_for_profanity', False))
Ejemplo n.º 29
0
    def __init__(self, filename, url):
        self.folder = config.get('hdx.download_with_cache.folder', '/tmp/')
        if not self.folder.endswith('/'):
            self.folder += '/'

        self.filename = filename
        self.full_file_path = self.folder + self.filename
        self.url = url
Ejemplo n.º 30
0
def harvest_source_index_clear(context, data_dict):
    '''
    Clears all datasets, jobs and objects related to a harvest source, but
    keeps the source itself.  This is useful to clean history of long running
    harvest sources to start again fresh.

    :param id: the id of the harvest source to clear
    :type id: string
    '''

    check_access('harvest_source_clear', context, data_dict)
    harvest_source_id = data_dict.get('id')

    source = HarvestSource.get(harvest_source_id)
    if not source:
        log.error('Harvest source %s does not exist', harvest_source_id)
        raise NotFound('Harvest source %s does not exist' % harvest_source_id)

    harvest_source_id = source.id

    conn = make_connection()
    query = ''' +%s:"%s" +site_id:"%s" ''' % (
        'harvest_source_id', harvest_source_id, config.get('ckan.site_id'))

    solr_commit = toolkit.asbool(config.get('ckan.search.solr_commit', 'true'))
    if toolkit.check_ckan_version(max_version='2.5.99'):
        # conn is solrpy
        try:
            conn.delete_query(query)
            if solr_commit:
                conn.commit()
        except Exception as e:
            log.exception(e)
            raise SearchIndexError(e)
        finally:
            conn.close()
    else:
        # conn is pysolr
        try:
            conn.delete(q=query, commit=solr_commit)
        except Exception as e:
            log.exception(e)
            raise SearchIndexError(e)

    return {'id': harvest_source_id}
Ejemplo n.º 31
0
 def _should_use_download_with_cache(self, dataset_name):
     if not S3Controller.datasets_for_download_with_cache:
         datasets_str = config.get('hdx.download_with_cache.datasets')
         if datasets_str:
             S3Controller.datasets_for_download_with_cache = datasets_str.split(',')
     if S3Controller.datasets_for_download_with_cache \
             and dataset_name in S3Controller.datasets_for_download_with_cache:
         return True
     return False
Ejemplo n.º 32
0
 def graph_from_catalog(self, catalog_dict, catalog_ref):
   g = self.g
   
   # dct:language
   language = config.get('ckan.locale_default', 'en')
   if language in self.language_mapping:
     mdrlang_language = self.language_mapping[language]
     g.remove((catalog_ref, DCT.language, Literal(language)))
     g.add((catalog_ref, DCT.language, Literal(getattr(MDRLANG, mdrlang_language))))
Ejemplo n.º 33
0
def _send_mail(user_ids, action_type, datarequest, job_title):
    for user_id in user_ids:
        try:
            user_data = model.User.get(user_id)
            extra_vars = {
                'datarequest': datarequest,
                'user': user_data,
                'site_title': config.get('ckan.site_title'),
                'site_url': config.get('ckan.site_url')
            }
            subject = base.render_jinja2(
                'emails/subjects/{0}.txt'.format(action_type), extra_vars)
            body = base.render_jinja2(
                'emails/bodies/{0}.txt'.format(action_type), extra_vars)
            tk.enqueue_job(mailer.mail_user, [user_data, subject, body],
                           title=job_title)
        except Exception:
            logging.exception(
                "Error sending notification to {0}".format(user_id))
Ejemplo n.º 34
0
def clean_harvest_log():
    from datetime import datetime, timedelta
    from ckantoolkit import config
    from ckanext.harvest.model import clean_harvest_log

    # Log time frame - in days
    log_timeframe = tk.asint(config.get("ckan.harvest.log_timeframe", 30))
    condition = datetime.utcnow() - timedelta(days=log_timeframe)

    # Delete logs older then the given date
    clean_harvest_log(condition=condition)
Ejemplo n.º 35
0
    def validator(key, data, errors, context):
        """
        Return a value for a core field using a multilingual dict.
        """
        data[key] = fluent_text_output(data[key])

        k = key[-1]
        new_key = key[:-1] + (k[:-len(LANG_SUFFIX)],)

        if new_key in data:
            data[new_key] = scheming_language_text(data[key], config.get('ckan.locale_default', 'en'))
Ejemplo n.º 36
0
def resource_formats(field):
    """Returns a list of resource formats from admin config

    :rtype: Array resource formats

    """
    resource_formats = config.get('ckanext.data_qld.resource_formats',
                                  '').split('\r\n')
    return [{
        'value': resource_format.strip().upper(),
        'label': resource_format.strip().upper()
    } for resource_format in resource_formats]
Ejemplo n.º 37
0
def is_prod():
    environment = config.get('ckan.site_url', '')
    if 'training' in environment:
        return False
    elif 'dev' in environment:
        return False
    elif 'staging' in environment:
        return False
    elif 'ckan' in environment:
        return False
    else:
        return True
Ejemplo n.º 38
0
def fluent_text_output(value):
    """
    Return stored json representation as a multilingual dict, if
    value is already a dict just pass it through.
    """
    if isinstance(value, dict):
        return value
    try:
        return json.loads(value)
    except ValueError:
        # plain string in the db, assume default locale
        return {config.get('ckan.locale_default', 'en'): value}
Ejemplo n.º 39
0
    def test_group_image_upload_then_clear(self):
        '''Test that clearing an upload removes the S3 key'''

        sysadmin = factories.Sysadmin(apikey="my-test-key")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        file_name = "somename.png"

        img_uploader = FlaskFileStorage(filename=file_name,
                                        stream=open(file_path),
                                        content_type='image/png')

        with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date:
            mock_date.datetime.utcnow.return_value = \
                datetime.datetime(2001, 1, 29)
            context = {'user': sysadmin['name']}
            helpers.call_action('group_create',
                                context=context,
                                name="my-group",
                                image_upload=img_uploader,
                                image_url=file_name)

        key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \
            .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name)

        s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url)

        # check whether the object exists in S3
        # will throw exception if not existing
        s3.head_object(Bucket='my-bucket', Key=key)

        #conn = boto.connect_s3()
        #bucket = conn.get_bucket('my-bucket')
        # test the key exists
        #assert_true(bucket.lookup(key))

        # clear upload
        helpers.call_action('group_update',
                            context=context,
                            id='my-group',
                            name='my-group',
                            image_url="http://asdf",
                            clear_upload=True)

        # key shouldn't exist
        #assert_false(bucket.lookup(key))
        try:
            s3.head_object(Bucket='my-bucket', Key=key)
            assert_false(True, "file should not exist")
        except:
            # passed
            assert_true(True, "passed")
Ejemplo n.º 40
0
    def _publisher_graph(self, dataset_ref, dataset_dict):
        if any([
                self._get_dataset_value(dataset_dict, 'publisher_uri'),
                self._get_dataset_value(dataset_dict, 'publisher_name'),
                dataset_dict.get('organization'),
        ]):

            publisher_uri = publisher_uri_from_dataset_dict(dataset_dict)
            if publisher_uri:
                publisher_details = URIRef(publisher_uri)
            else:
                # No organization nor publisher_uri
                publisher_details = BNode()

            self.g.add((publisher_details, RDF.type, SCHEMA.Organization))
            self.g.add((dataset_ref, SCHEMA.publisher, publisher_details))
            self.g.add((dataset_ref, SCHEMA.sourceOrganization,
                        publisher_details))  # noqa

            publisher_name = self._get_dataset_value(dataset_dict,
                                                     'publisher_name')
            if not publisher_name and dataset_dict.get('organization'):
                publisher_name = dataset_dict['organization']['title']
                self._add_multilang_value(publisher_details,
                                          SCHEMA.name,
                                          multilang_values=publisher_name)
            else:
                g.add((publisher_details, SCHEMA.name,
                       Literal(publisher_name)))  # noqa

            contact_point = BNode()
            self.g.add((publisher_details, SCHEMA.contactPoint, contact_point))

            self.g.add((contact_point, SCHEMA.contactType,
                        Literal('customer service')))  # noqa

            publisher_url = self._get_dataset_value(dataset_dict,
                                                    'publisher_url')  # noqa
            if not publisher_url and dataset_dict.get('organization'):
                publisher_url = dataset_dict['organization'].get(
                    'url') or config.get('ckan.site_url', '')  # noqa

            self.g.add((contact_point, SCHEMA.url, Literal(publisher_url)))
            items = [
                ('publisher_email', SCHEMA.email,
                 ['contact_email', 'maintainer_email',
                  'author_email'], Literal),  # noqa
                ('publisher_name', SCHEMA.name,
                 ['contact_name', 'maintainer', 'author'], Literal),  # noqa
            ]

            self._add_triples_from_dict(dataset_dict, contact_point, items)
Ejemplo n.º 41
0
def scheming_language_text(text, prefer_lang=None):
    """
    :param text: {lang: text} dict or text string
    :param prefer_lang: choose this language version if available

    Convert "language-text" to users' language by looking up
    languag in dict or using gettext if not a dict
    """
    if not text:
        return u''

    assert text != {}
    if hasattr(text, 'get'):
        try:
            if prefer_lang is None:
                prefer_lang = lang()
        except TypeError:
            pass  # lang() call will fail when no user language available
        else:
            try:
                return text[prefer_lang]
            except KeyError:
                pass

        default_locale = config.get('ckan.locale_default', 'en')
        try:
            return text[default_locale]
        except KeyError:
            pass

        l, v = sorted(text.items())[0]
        return v

    if isinstance(text, str):
        text = text.decode('utf-8')
    t = _(text)
    return t
Ejemplo n.º 42
0
    def test_resource_upload(self):
        '''Test a basic resource file upload'''
        factories.Sysadmin(apikey="my-test-key")

        app = self._get_test_app()
        demo = ckanapi.TestAppCKAN(app, apikey='my-test-key')
        factories.Dataset(name="my-dataset")

        file_path = os.path.join(os.path.dirname(__file__), 'data.csv')
        resource = demo.action.resource_create(package_id='my-dataset',
                                               upload=open(file_path),
                                               url='file.txt')

        key = '{1}/resources/{0}/data.csv' \
            .format(resource['id'],
                    config.get('ckanext.s3filestore.aws_storage_path'))

        conn = boto.connect_s3()
        bucket = conn.get_bucket('my-bucket')
        # test the key exists
        assert_true(bucket.lookup(key))
        # test the file contains what's expected
        assert_equal(bucket.get_key(key).get_contents_as_string(),
                     open(file_path).read())
Ejemplo n.º 43
0
    def resource_download(self, id, resource_id, filename=None):
        '''
        Provide a download by either redirecting the user to the url stored or
        downloading the uploaded file from S3.
        '''
        context = {'model': model, 'session': model.Session,
                   'user': c.user or c.author, 'auth_user_obj': c.userobj}

        try:
            rsc = get_action('resource_show')(context, {'id': resource_id})
            get_action('package_show')(context, {'id': id})
        except NotFound:
            abort(404, _('Resource not found'))
        except NotAuthorized:
            abort(401, _('Unauthorized to read resource %s') % id)

        if rsc.get('url_type') == 'upload':
            upload = uploader.get_resource_uploader(rsc)
            bucket_name = config.get('ckanext.s3filestore.aws_bucket_name')
            region = config.get('ckanext.s3filestore.region_name')
            host_name = config.get('ckanext.s3filestore.host_name')
            bucket = upload.get_s3_bucket(bucket_name)

            if filename is None:
                filename = os.path.basename(rsc['url'])
            key_path = upload.get_path(rsc['id'], filename)
            key = filename

            if key is None:
                log.warn('Key \'{0}\' not found in bucket \'{1}\''
                         .format(key_path, bucket_name))

            try:
                # Small workaround to manage downloading of large files
                # We are using redirect to minio's resource public URL
                s3 = upload.get_s3_session()
                client = s3.client(service_name='s3', endpoint_url=host_name)
                url = client.generate_presigned_url(ClientMethod='get_object',
                                                    Params={'Bucket': bucket.name,
                                                            'Key': key_path},
                                                    ExpiresIn=60)
                redirect(url)

            except ClientError as ex:
                if ex.response['Error']['Code'] == 'NoSuchKey':
                    # attempt fallback
                    if config.get(
                            'ckanext.s3filestore.filesystem_download_fallback',
                            False):
                        log.info('Attempting filesystem fallback for resource {0}'
                                 .format(resource_id))
                        url = toolkit.url_for(
                            controller='ckanext.s3filestore.controller:S3Controller',
                            action='filesystem_resource_download',
                            id=id,
                            resource_id=resource_id,
                            filename=filename)
                        redirect(url)

                    abort(404, _('Resource data not found'))
                else:
                    raise ex