def check_config(self): exit = False for key in ('ckanext.s3filestore.aws_access_key_id', 'ckanext.s3filestore.aws_secret_access_key', 'ckanext.s3filestore.aws_bucket_name'): if not config.get(key): print 'You must set the "{0}" option in your ini file'.format( key) exit = True if exit: sys.exit(1) print 'All configuration options defined' bucket_name = config.get('ckanext.s3filestore.aws_bucket_name') public_key = config.get('ckanext.s3filestore.aws_access_key_id') secret_key = config.get('ckanext.s3filestore.aws_secret_access_key') S3_conn = boto.connect_s3(public_key, secret_key) # Check if bucket exists bucket = S3_conn.lookup(bucket_name) if bucket is None: print 'Bucket {0} does not exist, trying to create it...'.format( bucket_name) try: bucket = S3_conn.create_bucket(bucket_name) except boto.exception.StandardError as e: print 'An error was found while creating the bucket:' print str(e) sys.exit(1) print 'Configuration OK!'
def catalog_uri(): ''' Returns an URI for the whole catalog This will be used to uniquely reference the CKAN instance on the RDF serializations and as a basis for eg datasets URIs (if not present on the metadata). The value will be the first found of: 1. The `ckanext.dcat.base_uri` config option (recommended) 2. The `ckan.site_url` config option 3. `http://` + the `app_instance_uuid` config option (minus brackets) A warning is emited if the third option is used. Returns a string with the catalog URI. ''' uri = config.get('ckanext.dcat.base_uri') if not uri: uri = config.get('ckan.site_url') if not uri: app_uuid = config.get('app_instance_uuid') if app_uuid: uri = 'http://' + app_uuid.replace('{', '').replace('}', '') log.critical('Using app id as catalog URI, you should set the ' + '`ckanext.dcat.base_uri` or `ckan.site_url` option') else: uri = 'http://' + str(uuid.uuid4()) log.critical('Using a random id as catalog URI, you should set ' + 'the `ckanext.dcat.base_uri` or `ckan.site_url` ' + 'option') return uri
def test_graph_from_catalog(self): s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() eq_(unicode(catalog), utils.catalog_uri()) # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title')) assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url'))) assert self._triple(g, catalog, DCT.language, 'en')
def test_pagination_keeps_only_supported_params(self, mock_request): mock_request.params = { 'a': 1, 'b': 2, 'modified_since': '2018-03-22', 'profiles': 'schemaorg' } mock_request.host_url = 'http://ckan.example.com' mock_request.path = '/feed/catalog.xml' # No page defined (defaults to 1) query = { 'count': 12, 'results': [x for x in range(10)], } data_dict = {'page': None} pagination = _pagination_info(query, data_dict) assert pagination['count'] == 12 assert pagination['items_per_page'] == config.get( 'ckanext.dcat.datasets_per_page') assert pagination['current'] == 'http://example.com/feed/catalog.xml?' \ 'modified_since=2018-03-22&profiles=schemaorg&page=1' assert pagination[ 'first'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1' assert pagination[ 'last'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2' assert pagination[ 'next'] == 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2' assert 'previous' not in pagination
def test_resource_upload(self): '''Test a basic resource file upload''' factories.Sysadmin(apikey="my-test-key") app = self._get_test_app() demo = ckanapi.TestAppCKAN(app, apikey='my-test-key') factories.Dataset(name="my-dataset") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') resource = demo.action.resource_create(package_id='my-dataset', upload=open(file_path), url='file.txt') key = '{1}/resources/{0}/data.csv' \ .format(resource['id'], config.get('ckanext.s3filestore.aws_storage_path')) s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url) # check whether the object exists in S3 # will throw exception if not existing s3.head_object(Bucket='my-bucket', Key=key) #conn = boto.connect_s3() #bucket = conn.get_bucket('my-bucket') # test the key exists #assert_true(bucket.lookup(key)) # test the file contains what's expected obj = s3.get_object(Bucket='my-bucket', Key=key) data = obj['Body'].read() assert_equal(data, open(file_path).read())
def test_resource_upload_then_clear(self): '''Test that clearing an upload removes the S3 key''' sysadmin = factories.Sysadmin(apikey="my-test-key") app = self._get_test_app() demo = ckanapi.TestAppCKAN(app, apikey='my-test-key') dataset = factories.Dataset(name="my-dataset") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') resource = demo.action.resource_create(package_id='my-dataset', upload=open(file_path), url='file.txt') key = '{1}/resources/{0}/data.csv' \ .format(resource['id'], config.get('ckanext.s3filestore.aws_storage_path')) conn = boto.connect_s3() bucket = conn.get_bucket('my-bucket') # test the key exists assert_true(bucket.lookup(key)) # clear upload url = toolkit.url_for(controller='package', action='resource_edit', id=dataset['id'], resource_id=resource['id']) env = {'REMOTE_USER': sysadmin['name'].encode('ascii')} app.post(url, {'clear_upload': True, 'url': 'http://asdf', 'save': 'save'}, extra_environ=env) # key shouldn't exist assert_false(bucket.lookup(key))
def test_group_image_upload(self): '''Test a group image file upload''' sysadmin = factories.Sysadmin(apikey="my-test-key") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') file_name = 'somename.png' img_uploader = Uploader(file_name, file=open(file_path)) with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date: mock_date.datetime.utcnow.return_value = \ datetime.datetime(2001, 1, 29) context = {'user': sysadmin['name']} helpers.call_action('group_create', context=context, name="my-group", image_upload=img_uploader, image_url=file_name, save='save') key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \ .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name) conn = boto.connect_s3() bucket = conn.get_bucket('my-bucket') # test the key exists assert_true(bucket.lookup(key)) # requesting image redirects to s3 app = self._get_test_app() # attempt redirect to linked url image_file_url = '/uploads/group/{0}'.format(file_name) r = app.get(image_file_url, status=[302, 301]) assert_equal(r.location, 'https://my-bucket.s3.amazonaws.com/my-path/storage/uploads/group/{0}' .format(file_name))
def test_pagination_keeps_only_supported_params(self, mock_request): mock_request.params = {'a': 1, 'b': 2, 'modified_since': '2018-03-22', 'profiles': 'schemaorg'} mock_request.host_url = 'http://ckan.example.com' mock_request.path = '/feed/catalog.xml' # No page defined (defaults to 1) query = { 'count': 12, 'results': [x for x in xrange(10)], } data_dict = { 'page': None } pagination = _pagination_info(query, data_dict) eq_(pagination['count'], 12) eq_(pagination['items_per_page'], config.get('ckanext.dcat.datasets_per_page')) eq_(pagination['current'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1') eq_(pagination['first'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=1') eq_(pagination['last'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2') eq_(pagination['next'], 'http://example.com/feed/catalog.xml?modified_since=2018-03-22&profiles=schemaorg&page=2') assert 'previous' not in pagination
def test_pagination_same_results_than_page_size(self, mock_request): mock_request.params = {} mock_request.host_url = 'http://ckan.example.com' mock_request.path = '' # No page defined (defaults to 1) query = { 'count': 10, 'results': [x for x in xrange(10)], } data_dict = { 'page': None } pagination = _pagination_info(query, data_dict) eq_(pagination['count'], 10) eq_(pagination['items_per_page'], config.get('ckanext.dcat.datasets_per_page')) eq_(pagination['current'], 'http://example.com?page=1') eq_(pagination['first'], 'http://example.com?page=1') eq_(pagination['last'], 'http://example.com?page=1') assert 'next' not in pagination assert 'previous' not in pagination
def prepare_summary_mail(context, source_id, status): extra_vars = get_mail_extra_vars(context, source_id, status) body = render('emails/summary_email.txt', extra_vars) subject = '{} - Harvesting Job Successful - Summary Notification'\ .format(config.get('ckan.site_title')) return subject, body
def prepare_error_mail(context, source_id, status): extra_vars = get_mail_extra_vars(context, source_id, status) body = render('emails/error_email.txt', extra_vars) subject = '{} - Harvesting Job - Error Notification'\ .format(config.get('ckan.site_title')) return subject, body
def _gen_new_name(cls, title, existing_name=None, append_type=None): ''' Returns a 'name' for the dataset (URL friendly), based on the title. If the ideal name is already used, it will append a number to it to ensure it is unique. If generating a new name because the title of the dataset has changed, specify the existing name, in case the name doesn't need to change after all. :param existing_name: the current name of the dataset - only specify this if the dataset exists :type existing_name: string :param append_type: the type of characters to add to make it unique - either 'number-sequence' or 'random-hex'. :type append_type: string ''' # If append_type was given, use it. Otherwise, use the configured default. # If nothing was given and no defaults were set, use 'number-sequence'. if append_type: append_type_param = append_type else: append_type_param = config.get( 'ckanext.harvest.default_dataset_name_append', 'number-sequence') ideal_name = munge_title_to_name(title) ideal_name = re.sub('-+', '-', ideal_name) # collapse multiple dashes return cls._ensure_name_is_unique(ideal_name, existing_name=existing_name, append_type=append_type_param)
def get_dataset_rdf_url(default=False): url = toolkit.request.url url = url.split('?')[0] dataset_name = url.split('/')[-1] dataset_type = toolkit.get_action('package_show')(None, { 'id': dataset_name })['type'] uri = config.get('ckanext.dcat.base_uri') if not uri: uri = config.get('ckan.site_url') if default: return uri + '/' + 'dataset' + '/' + dataset_name # + '.ttl?profiles=gsq_dataset' else: return uri + '/' + 'dataset' + '/' + dataset_name # + '.ttl?profiles=gsq_' + dataset_type + ',gsq_dataset'
def get_validation_badge(resource, in_listing=False): if in_listing and not asbool( config.get('ckanext.validation.show_badges_in_listings', True)): return '' if not resource.get('validation_status'): return '' messages = { 'success': _('Valid data'), 'failure': _('Invalid data'), 'error': _('Error during validation'), 'unknown': _('Data validation unknown'), } if resource['validation_status'] in ['success', 'failure', 'error']: status = resource['validation_status'] else: status = 'unknown' validation_url = url_for('validation_read', id=resource['package_id'], resource_id=resource['id']) badge_url = url_for_static( '/images/badges/data-{}-flat.svg'.format(status)) return u''' <a href="{validation_url}" class="validation-badge"> <img src="{badge_url}" alt="{alt}" title="{title}"/> </a>'''.format(validation_url=validation_url, badge_url=badge_url, alt=messages[status], title=resource.get('validation_timestamp', ''))
def show_validation_schemas(): """ Returns a list of validation schemas""" schema_directory = config.get('ckanext.validation.schema_directory') if schema_directory: return _files_from_directory(schema_directory).keys() else: return []
def test_pagination_keeps_params(self, mock_request): mock_request.params = {'a': 1, 'b': 2} mock_request.host_url = 'http://ckan.example.com' mock_request.path = '/feed/catalog.xml' # No page defined (defaults to 1) query = { 'count': 12, 'results': [x for x in xrange(10)], } data_dict = {'page': None} pagination = _pagination_info(query, data_dict) eq_(pagination['count'], 12) eq_(pagination['items_per_page'], config.get('ckanext.dcat.datasets_per_page')) eq_(pagination['current'], 'http://example.com/feed/catalog.xml?a=1&b=2&page=1') eq_(pagination['first'], 'http://example.com/feed/catalog.xml?a=1&b=2&page=1') eq_(pagination['last'], 'http://example.com/feed/catalog.xml?a=1&b=2&page=2') eq_(pagination['next'], 'http://example.com/feed/catalog.xml?a=1&b=2&page=2') assert 'previous' not in pagination
def after_map(self, map): controller = 'ckanext.dcat.controllers:DCATController' route = config.get('ckanext.dcat.json_endpoint', '/dcat.json') map.connect(route, controller=controller, action='dcat_json') return map
def test_group_image_upload_then_clear(self): '''Test that clearing an upload removes the S3 key''' sysadmin = factories.Sysadmin(apikey="my-test-key") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') file_name = "somename.png" img_uploader = Uploader(file_name, file=open(file_path)) with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date: mock_date.datetime.utcnow.return_value = \ datetime.datetime(2001, 1, 29) context = {'user': sysadmin['name']} helpers.call_action('group_create', context=context, name="my-group", image_upload=img_uploader, image_url=file_name) key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \ .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name) conn = boto.connect_s3() bucket = conn.get_bucket('my-bucket') # test the key exists assert_true(bucket.lookup(key)) # clear upload helpers.call_action('group_update', context=context, id='my-group', name='my-group', image_url="http://asdf", clear_upload=True) # key shouldn't exist assert_false(bucket.lookup(key))
def test_resource_upload(self): '''Test a basic resource file upload''' factories.Sysadmin(apikey="my-test-key") app = helpers._get_test_app() demo = ckanapi.TestAppCKAN(app, apikey='my-test-key') factories.Dataset(name="my-dataset") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') resource = demo.action.resource_create(package_id='my-dataset', upload=open(file_path), url='file.txt') key = '{1}/resources/{0}/data.csv' \ .format(resource['id'], config.get('ckanext.s3filestore.aws_storage_path')) # check whether the object exists in S3 # will throw exception if not existing s3.head_object(Bucket=BUCKET_NAME, Key=key) # test the file contains what's expected obj = s3.get_object(Bucket=BUCKET_NAME, Key=key) data = obj['Body'].read() assert_equal(data, open(file_path).read())
def datarequest_suggested_description(): """Returns a datarequest suggested description from admin config :rtype: string """ return config.get('ckanext.data_qld.datarequest_suggested_description', '')
def test_graph_from_catalog(self): s = RDFSerializer() g = s.g catalog = s.graph_from_catalog() assert str(catalog) == utils.catalog_uri() # Basic fields assert self._triple(g, catalog, RDF.type, DCAT.Catalog) assert self._triple(g, catalog, DCT.title, config.get('ckan.site_title')) assert self._triple(g, catalog, FOAF.homepage, URIRef(config.get('ckan.site_url'))) assert self._triple(g, catalog, DCT.language, 'en')
def _get_user_name(self): ''' Returns the name of the user that will perform the harvesting actions (deleting, updating and creating datasets) By default this will be the internal site admin user. This is the recommended setting, but if necessary it can be overridden with the `ckanext.spatial.harvest.user_name` config option, eg to support the old hardcoded 'harvest' user: ckanext.spatial.harvest.user_name = harvest ''' if self._user_name: return self._user_name context = { 'model': model, 'ignore_auth': True, 'defer_commit': True, # See ckan/ckan#1714 } self._site_user = p.toolkit.get_action('get_site_user')(context, {}) config_user_name = config.get('ckanext.spatial.harvest.user_name') if config_user_name: self._user_name = config_user_name else: self._user_name = self._site_user['name'] return self._user_name
def test_pagination_without_site_url(self, mock_request): mock_request.params = {} mock_request.host_url = 'http://ckan.example.com' mock_request.path = '/feed/catalog.xml' # No page defined (defaults to 1) query = { 'count': 12, 'results': [x for x in xrange(10)], } data_dict = { 'page': None } pagination = _pagination_info(query, data_dict) eq_(pagination['count'], 12) eq_(pagination['items_per_page'], config.get('ckanext.dcat.datasets_per_page')) eq_(pagination['current'], 'http://ckan.example.com/feed/catalog.xml?page=1') eq_(pagination['first'], 'http://ckan.example.com/feed/catalog.xml?page=1') eq_(pagination['last'], 'http://ckan.example.com/feed/catalog.xml?page=2') eq_(pagination['next'], 'http://ckan.example.com/feed/catalog.xml?page=2') assert 'previous' not in pagination
def test_pagination_with_dcat_base_uri(self, mock_request): mock_request.params = {} mock_request.host_url = 'http://ckan.example.com' mock_request.path = '/feed/catalog.xml' # No page defined (defaults to 1) query = { 'count': 12, 'results': [x for x in range(10)], } data_dict = {'page': None} pagination = _pagination_info(query, data_dict) assert pagination['count'] == 12 assert pagination['items_per_page'] == config.get( 'ckanext.dcat.datasets_per_page') assert pagination[ 'current'] == 'http://example.com/data/feed/catalog.xml?page=1' assert pagination[ 'first'] == 'http://example.com/data/feed/catalog.xml?page=1' assert pagination[ 'last'] == 'http://example.com/data/feed/catalog.xml?page=2' assert pagination[ 'next'] == 'http://example.com/data/feed/catalog.xml?page=2' assert 'previous' not in pagination
def uploaded_file_redirect(self, upload_to, filename): '''Redirect static file requests to their location on S3.''' host_name = config.get('ckanext.s3filestore.host_name') # Remove last characted if it's a slash if host_name[-1] == '/': host_name = host_name[:-1] storage_path = S3Uploader.get_storage_path(upload_to) filepath = os.path.join(storage_path, filename) #host = config.get('ckanext.s3.filestore.hostname') # redirect_url = 'https://{bucket_name}.minio.omc.ckan.io/{filepath}' \ # .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'), # filepath=filepath) redirect_url = '{host_name}/{bucket_name}/{filepath}'\ .format(bucket_name=config.get('ckanext.s3filestore.aws_bucket_name'), filepath=filepath, host_name=host_name) redirect(redirect_url)
def profanity_checking_enabled(): """Check to see if YTP comments extension is enabled and `check_for_profanity` is enabled :rtype: bool """ return ytp_comments_enabled() \ and toolkit.asbool(config.get('ckan.comments.check_for_profanity', False))
def __init__(self, filename, url): self.folder = config.get('hdx.download_with_cache.folder', '/tmp/') if not self.folder.endswith('/'): self.folder += '/' self.filename = filename self.full_file_path = self.folder + self.filename self.url = url
def harvest_source_index_clear(context, data_dict): ''' Clears all datasets, jobs and objects related to a harvest source, but keeps the source itself. This is useful to clean history of long running harvest sources to start again fresh. :param id: the id of the harvest source to clear :type id: string ''' check_access('harvest_source_clear', context, data_dict) harvest_source_id = data_dict.get('id') source = HarvestSource.get(harvest_source_id) if not source: log.error('Harvest source %s does not exist', harvest_source_id) raise NotFound('Harvest source %s does not exist' % harvest_source_id) harvest_source_id = source.id conn = make_connection() query = ''' +%s:"%s" +site_id:"%s" ''' % ( 'harvest_source_id', harvest_source_id, config.get('ckan.site_id')) solr_commit = toolkit.asbool(config.get('ckan.search.solr_commit', 'true')) if toolkit.check_ckan_version(max_version='2.5.99'): # conn is solrpy try: conn.delete_query(query) if solr_commit: conn.commit() except Exception as e: log.exception(e) raise SearchIndexError(e) finally: conn.close() else: # conn is pysolr try: conn.delete(q=query, commit=solr_commit) except Exception as e: log.exception(e) raise SearchIndexError(e) return {'id': harvest_source_id}
def _should_use_download_with_cache(self, dataset_name): if not S3Controller.datasets_for_download_with_cache: datasets_str = config.get('hdx.download_with_cache.datasets') if datasets_str: S3Controller.datasets_for_download_with_cache = datasets_str.split(',') if S3Controller.datasets_for_download_with_cache \ and dataset_name in S3Controller.datasets_for_download_with_cache: return True return False
def graph_from_catalog(self, catalog_dict, catalog_ref): g = self.g # dct:language language = config.get('ckan.locale_default', 'en') if language in self.language_mapping: mdrlang_language = self.language_mapping[language] g.remove((catalog_ref, DCT.language, Literal(language))) g.add((catalog_ref, DCT.language, Literal(getattr(MDRLANG, mdrlang_language))))
def _send_mail(user_ids, action_type, datarequest, job_title): for user_id in user_ids: try: user_data = model.User.get(user_id) extra_vars = { 'datarequest': datarequest, 'user': user_data, 'site_title': config.get('ckan.site_title'), 'site_url': config.get('ckan.site_url') } subject = base.render_jinja2( 'emails/subjects/{0}.txt'.format(action_type), extra_vars) body = base.render_jinja2( 'emails/bodies/{0}.txt'.format(action_type), extra_vars) tk.enqueue_job(mailer.mail_user, [user_data, subject, body], title=job_title) except Exception: logging.exception( "Error sending notification to {0}".format(user_id))
def clean_harvest_log(): from datetime import datetime, timedelta from ckantoolkit import config from ckanext.harvest.model import clean_harvest_log # Log time frame - in days log_timeframe = tk.asint(config.get("ckan.harvest.log_timeframe", 30)) condition = datetime.utcnow() - timedelta(days=log_timeframe) # Delete logs older then the given date clean_harvest_log(condition=condition)
def validator(key, data, errors, context): """ Return a value for a core field using a multilingual dict. """ data[key] = fluent_text_output(data[key]) k = key[-1] new_key = key[:-1] + (k[:-len(LANG_SUFFIX)],) if new_key in data: data[new_key] = scheming_language_text(data[key], config.get('ckan.locale_default', 'en'))
def resource_formats(field): """Returns a list of resource formats from admin config :rtype: Array resource formats """ resource_formats = config.get('ckanext.data_qld.resource_formats', '').split('\r\n') return [{ 'value': resource_format.strip().upper(), 'label': resource_format.strip().upper() } for resource_format in resource_formats]
def is_prod(): environment = config.get('ckan.site_url', '') if 'training' in environment: return False elif 'dev' in environment: return False elif 'staging' in environment: return False elif 'ckan' in environment: return False else: return True
def fluent_text_output(value): """ Return stored json representation as a multilingual dict, if value is already a dict just pass it through. """ if isinstance(value, dict): return value try: return json.loads(value) except ValueError: # plain string in the db, assume default locale return {config.get('ckan.locale_default', 'en'): value}
def test_group_image_upload_then_clear(self): '''Test that clearing an upload removes the S3 key''' sysadmin = factories.Sysadmin(apikey="my-test-key") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') file_name = "somename.png" img_uploader = FlaskFileStorage(filename=file_name, stream=open(file_path), content_type='image/png') with mock.patch('ckanext.s3filestore.uploader.datetime') as mock_date: mock_date.datetime.utcnow.return_value = \ datetime.datetime(2001, 1, 29) context = {'user': sysadmin['name']} helpers.call_action('group_create', context=context, name="my-group", image_upload=img_uploader, image_url=file_name) key = '{0}/storage/uploads/group/2001-01-29-000000{1}' \ .format(config.get('ckanext.s3filestore.aws_storage_path'), file_name) s3 = self.botoSession.client('s3', endpoint_url=self.endpoint_url) # check whether the object exists in S3 # will throw exception if not existing s3.head_object(Bucket='my-bucket', Key=key) #conn = boto.connect_s3() #bucket = conn.get_bucket('my-bucket') # test the key exists #assert_true(bucket.lookup(key)) # clear upload helpers.call_action('group_update', context=context, id='my-group', name='my-group', image_url="http://asdf", clear_upload=True) # key shouldn't exist #assert_false(bucket.lookup(key)) try: s3.head_object(Bucket='my-bucket', Key=key) assert_false(True, "file should not exist") except: # passed assert_true(True, "passed")
def _publisher_graph(self, dataset_ref, dataset_dict): if any([ self._get_dataset_value(dataset_dict, 'publisher_uri'), self._get_dataset_value(dataset_dict, 'publisher_name'), dataset_dict.get('organization'), ]): publisher_uri = publisher_uri_from_dataset_dict(dataset_dict) if publisher_uri: publisher_details = URIRef(publisher_uri) else: # No organization nor publisher_uri publisher_details = BNode() self.g.add((publisher_details, RDF.type, SCHEMA.Organization)) self.g.add((dataset_ref, SCHEMA.publisher, publisher_details)) self.g.add((dataset_ref, SCHEMA.sourceOrganization, publisher_details)) # noqa publisher_name = self._get_dataset_value(dataset_dict, 'publisher_name') if not publisher_name and dataset_dict.get('organization'): publisher_name = dataset_dict['organization']['title'] self._add_multilang_value(publisher_details, SCHEMA.name, multilang_values=publisher_name) else: g.add((publisher_details, SCHEMA.name, Literal(publisher_name))) # noqa contact_point = BNode() self.g.add((publisher_details, SCHEMA.contactPoint, contact_point)) self.g.add((contact_point, SCHEMA.contactType, Literal('customer service'))) # noqa publisher_url = self._get_dataset_value(dataset_dict, 'publisher_url') # noqa if not publisher_url and dataset_dict.get('organization'): publisher_url = dataset_dict['organization'].get( 'url') or config.get('ckan.site_url', '') # noqa self.g.add((contact_point, SCHEMA.url, Literal(publisher_url))) items = [ ('publisher_email', SCHEMA.email, ['contact_email', 'maintainer_email', 'author_email'], Literal), # noqa ('publisher_name', SCHEMA.name, ['contact_name', 'maintainer', 'author'], Literal), # noqa ] self._add_triples_from_dict(dataset_dict, contact_point, items)
def scheming_language_text(text, prefer_lang=None): """ :param text: {lang: text} dict or text string :param prefer_lang: choose this language version if available Convert "language-text" to users' language by looking up languag in dict or using gettext if not a dict """ if not text: return u'' assert text != {} if hasattr(text, 'get'): try: if prefer_lang is None: prefer_lang = lang() except TypeError: pass # lang() call will fail when no user language available else: try: return text[prefer_lang] except KeyError: pass default_locale = config.get('ckan.locale_default', 'en') try: return text[default_locale] except KeyError: pass l, v = sorted(text.items())[0] return v if isinstance(text, str): text = text.decode('utf-8') t = _(text) return t
def test_resource_upload(self): '''Test a basic resource file upload''' factories.Sysadmin(apikey="my-test-key") app = self._get_test_app() demo = ckanapi.TestAppCKAN(app, apikey='my-test-key') factories.Dataset(name="my-dataset") file_path = os.path.join(os.path.dirname(__file__), 'data.csv') resource = demo.action.resource_create(package_id='my-dataset', upload=open(file_path), url='file.txt') key = '{1}/resources/{0}/data.csv' \ .format(resource['id'], config.get('ckanext.s3filestore.aws_storage_path')) conn = boto.connect_s3() bucket = conn.get_bucket('my-bucket') # test the key exists assert_true(bucket.lookup(key)) # test the file contains what's expected assert_equal(bucket.get_key(key).get_contents_as_string(), open(file_path).read())
def resource_download(self, id, resource_id, filename=None): ''' Provide a download by either redirecting the user to the url stored or downloading the uploaded file from S3. ''' context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} try: rsc = get_action('resource_show')(context, {'id': resource_id}) get_action('package_show')(context, {'id': id}) except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) if rsc.get('url_type') == 'upload': upload = uploader.get_resource_uploader(rsc) bucket_name = config.get('ckanext.s3filestore.aws_bucket_name') region = config.get('ckanext.s3filestore.region_name') host_name = config.get('ckanext.s3filestore.host_name') bucket = upload.get_s3_bucket(bucket_name) if filename is None: filename = os.path.basename(rsc['url']) key_path = upload.get_path(rsc['id'], filename) key = filename if key is None: log.warn('Key \'{0}\' not found in bucket \'{1}\'' .format(key_path, bucket_name)) try: # Small workaround to manage downloading of large files # We are using redirect to minio's resource public URL s3 = upload.get_s3_session() client = s3.client(service_name='s3', endpoint_url=host_name) url = client.generate_presigned_url(ClientMethod='get_object', Params={'Bucket': bucket.name, 'Key': key_path}, ExpiresIn=60) redirect(url) except ClientError as ex: if ex.response['Error']['Code'] == 'NoSuchKey': # attempt fallback if config.get( 'ckanext.s3filestore.filesystem_download_fallback', False): log.info('Attempting filesystem fallback for resource {0}' .format(resource_id)) url = toolkit.url_for( controller='ckanext.s3filestore.controller:S3Controller', action='filesystem_resource_download', id=id, resource_id=resource_id, filename=filename) redirect(url) abort(404, _('Resource data not found')) else: raise ex