def test_munge_filename_multiple_pass(self): '''Munging filename multiple times produces same result.''' for org, exp in self.munge_list: first_munge = munge_filename(org) nose_tools.assert_equal(first_munge, exp) second_munge = munge_filename(first_munge) nose_tools.assert_equal(second_munge, exp)
def test_munge_filename_multiple_pass(self): '''Munging filename multiple times produces same result.''' for org, exp in self.munge_list: first_munge = munge_filename(org) assert_equal(first_munge, exp) second_munge = munge_filename(first_munge) assert_equal(second_munge, exp)
def test_munge_filename_pass(original, expected): """Munging filename multiple times produces same result.""" first_munge = munge_filename(original) assert first_munge == expected assert isinstance(first_munge, text_type) second_munge = munge_filename(first_munge) assert second_munge == expected assert isinstance(second_munge, text_type)
class ResourceUpload(object): def __init__(self, resource): path = get_storage_path() if not path: self.storage_path = None return self.storage_path = os.path.join(path, 'resources') try: os.makedirs(self.storage_path) except OSError, e: ## errno 17 is file already exists if e.errno != 17: raise self.filename = None url = resource.get('url') upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if isinstance(upload_field_storage, cgi.FieldStorage): self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' self.upload_file = upload_field_storage.file elif self.clear: resource['url_type'] = ''
def __init__(self, resource): """ Support for uploading resources to any storage provider implemented by the apache-libcloud library. :param resource: The resource dict. """ super(ResourceCloudStorage, self).__init__() self.filename = None self.old_filename = None self.file = None self.resource = resource upload_field_storage = resource.pop('upload', None) self._clear = resource.pop('clear_upload', None) # Check to see if a file has been provided if isinstance(upload_field_storage, cgi.FieldStorage): self.filename = munge.munge_filename(upload_field_storage.filename) self.file_upload = upload_field_storage.file resource['url'] = self.filename resource['url_type'] = 'upload' elif self._clear and resource.get('id'): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. old_resource = model.Session.query(model.Resource).get( resource['id']) self.old_filename = old_resource.url resource['url_type'] = ''
def __init__(self, resource): """ Support for uploading resources to any storage provider implemented by the apache-libcloud library. :param resource: The resource dict. """ super(ResourceCloudStorage, self).__init__() self.filename = None self.old_filename = None self.file = None self.resource = resource upload_field_storage = resource.pop('upload', None) self._clear = resource.pop('clear_upload', None) # Check to see if a file has been provided if isinstance(upload_field_storage, cgi.FieldStorage): self.filename = munge.munge_filename(upload_field_storage.filename) self.file_upload = upload_field_storage.file resource['url'] = self.filename resource['url_type'] = 'upload' elif self._clear and resource.get('id'): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. old_resource = model.Session.query( model.Resource ).get( resource['id'] ) self.old_filename = old_resource.url resource['url_type'] = ''
def update_data_dict(self, data_dict, url_field, file_field, clear_field): ''' Manipulate data from the data_dict. url_field is the name of the field where the upload is going to be. file_field is name of the key where the FieldStorage is kept (i.e the field where the file data actually is). clear_field is the name of a boolean field which requests the upload to be deleted. This needs to be called before it reaches any validators''' self.url = data_dict.get(url_field, '') self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) if not self.storage_path: return if isinstance(self.upload_field_storage, cgi.FieldStorage): self.filename = self.upload_field_storage.filename self.filename = str(datetime.datetime.utcnow()) + self.filename self.filename = munge.munge_filename(self.filename) self.filepath = os.path.join(self.storage_path, self.filename) data_dict[url_field] = self.filename self.upload_file = self.upload_field_storage.file self.tmp_filepath = self.filepath + '~' ### keep the file if there has been no change elif self.old_filename and not self.old_filename.startswith('http'): if not self.clear: data_dict[url_field] = self.old_filename if self.clear and self.url == self.old_filename: data_dict[url_field] = ''
def __init__(self, resource): '''Setup the resource uploader. Actual uploading is performed by `upload()`. Create a storage path in the format: <ckanext.azurefilestore.storage_path>/resources/ ''' super(AzureResourceUploader, self).__init__() path = config.get('ckanext.azurefilestore.storage_path', '') self.storage_path = os.path.join(path, 'resources') self.filename = None self.old_filename = None upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.datetime.utcnow() self.mimetype = resource.get('mimetype') if not self.mimetype: try: self.mimetype = resource['mimetype'] = mimetypes.guess_type(self.filename, strict=False)[0] except Exception: pass self.upload_file = _get_underlying_file(upload_field_storage) elif self.clear and resource.get('id'): # New, not yet created resources can be marked for deletion if the # user cancels an upload and enters a URL instead. old_resource = model.Session.query(model.Resource) \ .get(resource['id']) self.old_filename = old_resource.url resource['url_type'] = ''
def download(self, id, filename=None): ''' Provide a download by either redirecting the user to the url stored or downloading the uploaded file from S3. ''' if not self.use_filename or filename is None: filename = os.path.basename(self.url) filename = munge.munge_filename(filename) key_path = self.get_path(id, filename) key = filename if key is None: log.warning("Key '%s' not found in bucket '%s'", key_path, self.bucket_name) try: url = self.get_signed_url_to_key(key_path) h.redirect_to(url) except ClientError as ex: if ex.response['Error']['Code'] in ['NoSuchKey', '404']: # attempt fallback default_resource_upload = DefaultResourceUpload(self.resource) return default_resource_upload.download(id, self.filename) else: # Controller will raise 404 for us raise OSError(errno.ENOENT)
def __init__(self, resource): '''Setup the resource uploader. Actual uploading performed by `upload()`. Create a storage path in the format: <ckanext.s3filestore.aws_storage_path>/resources/ ''' super(S3ResourceUploader, self).__init__() path = config.get('ckanext.s3filestore.aws_storage_path', '') self.storage_path = os.path.join(path, 'resources') self.filename = None self.old_filename = None upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if isinstance(upload_field_storage, cgi.FieldStorage): self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' self.upload_file = upload_field_storage.file elif self.clear and resource.get('id'): # New, not yet created resources can be marked for deletion if the # users cancels an upload and enters a URL instead. old_resource = model.Session.query(model.Resource) \ .get(resource['id']) self.old_filename = old_resource.url resource['url_type'] = ''
def __init__(self, resource): '''Setup the resource uploader. Actual uploading performed by `upload()`. Create a storage path in the format: <ckanext.s3filestore.aws_storage_path>/resources/ ''' super(S3ResourceUploader, self).__init__() path = config.get('ckanext.cloud_storage.path', '') self.resource = resource self.storage_path = os.path.join(path, 'resources') self.filename = None upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if isinstance(upload_field_storage, cgi.FieldStorage): self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' self.upload_file = upload_field_storage.file elif self.clear: old_resource = model.Session.query(model.Resource).get(resource['id']) self.old_filename = old_resource.url resource['url_type'] = ''
def resource_dictize(res, context): model = context['model'] resource = d.table_dictize(res, context) resource_group_id = resource['resource_group_id'] extras = resource.pop("extras", None) if extras: resource.update(extras) # some urls do not have the protocol this adds http:// to these url = resource['url'] ## for_edit is only called at the times when the dataset is to be edited ## in the frontend. Without for_edit the whole qualified url is returned. if resource.get('url_type') == 'upload' and not context.get('for_edit'): resource_group = model.Session.query( model.ResourceGroup).get(resource_group_id) last_part = url.split('/')[-1] cleaned_name = munge.munge_filename(last_part) resource['url'] = h.url_for(controller='package', action='resource_download', id=resource_group.package_id, resource_id=res.id, filename=cleaned_name, qualified=True) elif not urlparse.urlsplit(url).scheme and not context.get('for_edit'): resource['url'] = u'http://' + url.lstrip('/') return resource
def resource_dictize(res, context): model = context['model'] resource = d.table_dictize(res, context) resource_group_id = resource['resource_group_id'] extras = resource.pop("extras", None) if extras: resource.update(extras) resource['format'] = _unified_resource_format(res.format) # some urls do not have the protocol this adds http:// to these url = resource['url'] ## for_edit is only called at the times when the dataset is to be edited ## in the frontend. Without for_edit the whole qualified url is returned. if resource.get('url_type') == 'upload' and not context.get('for_edit'): resource_group = model.Session.query( model.ResourceGroup).get(resource_group_id) last_part = url.split('/')[-1] cleaned_name = munge.munge_filename(last_part) resource['url'] = h.url_for(controller='package', action='resource_download', id=resource_group.package_id, resource_id=res.id, filename=cleaned_name, qualified=True) elif not urlparse.urlsplit(url).scheme and not context.get('for_edit'): resource['url'] = u'http://' + url.lstrip('/') return resource
def __init__(self, resource): log.info('Start a fake upload to copy resource at {}'.format( resource['upload'])) path = get_storage_path() config_mimetype_guess = config.get('ckan.mimetype_guess', 'file_ext') if not path: self.storage_path = None return self.storage_path = os.path.join(path, 'resources') try: os.makedirs(self.storage_path) except OSError as e: # errno 17 is file already exists if e.errno != 17: raise self.filename = None self.mimetype = None url = resource.get('url') upload_path = resource.pop('upload', None) resource['upload'] = 'Local File' upload_field_storage = open(upload_path) self.clear = resource.pop('clear_upload', None) if config_mimetype_guess == 'file_ext': self.mimetype = mimetypes.guess_type(url)[0] self.filesize = 0 # bytes self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.datetime.utcnow() self.upload_file = upload_field_storage.file self.upload_file.seek(0, os.SEEK_END) self.filesize = self.upload_file.tell() # go back to the beginning of the file buffer self.upload_file.seek(0, os.SEEK_SET) # check if the mimetype failed from guessing with the url if not self.mimetype and config_mimetype_guess == 'file_ext': self.mimetype = mimetypes.guess_type(self.filename)[0] if not self.mimetype and config_mimetype_guess == 'file_contents': try: self.mimetype = magic.from_buffer(self.upload_file.read(), mime=True) self.upload_file.seek(0, os.SEEK_SET) except IOError as e: # Not that important if call above fails self.mimetype = None
def path_from_filename(self, rid, filename): """ Returns a bucket path for the given resource_id and filename. :param rid: The resource ID. :param filename: The unmunged resource filename. """ return os.path.join('resources', rid, munge.munge_filename(filename))
def _get_resource_s3_path(resource_dict): download_url = resource_dict.get("download_url") or resource_dict.get("hdx_rel_url") if "download/" in download_url: url = download_url.split("download/")[1] else: url = resource_dict.get("name") munged_resource_name = munge.munge_filename(url) return munged_resource_name
def group_list_dictize(obj_list, context, sort_key=lambda x:x['display_name'], reverse=False, with_package_counts=True): active = context.get('active', True) with_private = context.get('include_private_packages', False) if with_package_counts: query = search.PackageSearchQuery() q = {'q': '+capacity:public' if not with_private else '*:*', 'fl': 'groups', 'facet.field': ['groups', 'owner_org'], 'facet.limit': -1, 'rows': 1} query.run(q) result_list = [] for obj in obj_list: if context.get('with_capacity'): obj, capacity = obj group_dict = d.table_dictize(obj, context, capacity=capacity) else: group_dict = d.table_dictize(obj, context) group_dict.pop('created') if active and obj.state not in ('active', 'pending'): continue group_dict['display_name'] = (group_dict.get('title') or group_dict.get('name')) image_url = group_dict.get('image_url') group_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename(image_url) group_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % group_dict.get('image_url'), qualified=True ) if with_package_counts: facets = query.facets if obj.is_organization: group_dict['packages'] = facets['owner_org'].get(obj.id, 0) else: group_dict['packages'] = facets['groups'].get(obj.name, 0) if context.get('for_view'): if group_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): group_dict = item.before_view(group_dict) result_list.append(group_dict) return sorted(result_list, key=sort_key, reverse=reverse)
def __init__(self, resource: dict[str, Any]) -> None: path = get_storage_path() config_mimetype_guess = config.get_value('ckan.mimetype_guess') if not path: self.storage_path = None return self.storage_path = os.path.join(path, 'resources') try: os.makedirs(self.storage_path) except OSError as e: # errno 17 is file already exists if e.errno != 17: raise self.filename = None self.mimetype = None url = resource.get('url') upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if url and config_mimetype_guess == 'file_ext' and urlparse(url).path: self.mimetype = mimetypes.guess_type(url)[0] if bool(upload_field_storage) and \ isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filesize = 0 # bytes self.filename = upload_field_storage.filename assert self.filename is not None self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.datetime.utcnow() self.upload_file = _get_underlying_file(upload_field_storage) assert self.upload_file is not None self.upload_file.seek(0, os.SEEK_END) self.filesize = self.upload_file.tell() # go back to the beginning of the file buffer self.upload_file.seek(0, os.SEEK_SET) # check if the mimetype failed from guessing with the url if not self.mimetype and config_mimetype_guess == 'file_ext': self.mimetype = mimetypes.guess_type(self.filename)[0] if not self.mimetype and config_mimetype_guess == 'file_contents': try: self.mimetype = magic.from_buffer(self.upload_file.read(), mime=True) self.upload_file.seek(0, os.SEEK_SET) except IOError: # Not that important if call above fails self.mimetype = None elif self.clear: resource['url_type'] = ''
def __init__(self, resource): """ Support for uploading resources to any storage provider implemented by the apache-libcloud library. :param resource: The resource dict. """ super(ResourceCloudStorage, self).__init__() self.filename = None self.old_filename = None self.file = None self.resource = resource upload_field_storage = resource.pop('upload', None) self._clear = resource.pop('clear_upload', None) multipart_name = resource.pop('multipart_name', None) # Check to see if a file has been provided if isinstance(upload_field_storage, cgi.FieldStorage): self.filename = munge.munge_filename(upload_field_storage.filename) self.file_upload = upload_field_storage.file resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.utcnow() elif multipart_name and self.can_use_advanced_aws: # This means that file was successfully uploaded and stored # at cloud. # Currently implemented just AWS version resource['url'] = munge.munge_filename(multipart_name) resource['url_type'] = 'upload' elif self._clear and resource.get('id'): # Apparently, this is a created-but-not-commited resource whose # file upload has been canceled. We're copying the behaviour of # ckaenxt-s3filestore here. old_resource = model.Session.query( model.Resource ).get( resource['id'] ) self.old_filename = old_resource.url resource['url_type'] = ''
def path_from_filename(self, rid, filename): """ Returns a bucket path for the given resource_id and filename. If there is no parent directory name specified, the root of the bucket will contain the resources directory. :param rid: The resource ID. :param filename: The unmunged resource filename. """ return os.path.join(self.parent_directory_name, 'resources', rid, munge.munge_filename(filename))
def file_remove_s3(resource_id, resource_url): try: uploader = S3ResourceUploader({}) # resource_name = find_filename_in_url(resource_url) munged_resource_name = munge.munge_filename(resource_url) filepath = uploader.get_path(resource_id, munged_resource_name) uploader.clear_key(filepath) except Exception, e: msg = 'Couldn\'t delete file from S3' log.warning(msg + str(e))
def group_dictize(group, context): model = context['model'] result_dict = d.table_dictize(group, context) result_dict['display_name'] = group.display_name result_dict['extras'] = extras_dict_dictize( group._extras, context) context['with_capacity'] = True result_dict['packages'] = d.obj_list_dictize( _get_members(context, group, 'packages'), context) query = search.PackageSearchQuery() if group.is_organization: q = {'q': 'owner_org:"%s" +capacity:public' % group.id, 'rows': 1} else: q = {'q': 'groups:"%s" +capacity:public' % group.name, 'rows': 1} result_dict['package_count'] = query.run(q)['count'] result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) result_dict['groups'] = group_list_dictize( _get_members(context, group, 'groups'), context) result_dict['users'] = user_list_dictize( _get_members(context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename(image_url) result_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % result_dict.get('image_url'), qualified = True ) return result_dict
def __init__(self, resource): path = get_storage_path() config_mimetype_guess = config.get('ckan.mimetype_guess', 'file_ext') if not path: self.storage_path = None return self.storage_path = os.path.join(path, 'resources') try: os.makedirs(self.storage_path) except OSError as e: # errno 17 is file already exists if e.errno != 17: raise self.filename = None self.mimetype = None url = resource.get('url') upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if config_mimetype_guess == 'file_ext': self.mimetype = mimetypes.guess_type(url)[0] if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filesize = 0 # bytes self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.datetime.utcnow() self.upload_file = _get_underlying_file(upload_field_storage) self.upload_file.seek(0, os.SEEK_END) self.filesize = self.upload_file.tell() # go back to the beginning of the file buffer self.upload_file.seek(0, os.SEEK_SET) # check if the mimetype failed from guessing with the url if not self.mimetype and config_mimetype_guess == 'file_ext': self.mimetype = mimetypes.guess_type(self.filename)[0] if not self.mimetype and config_mimetype_guess == 'file_contents': try: self.mimetype = magic.from_buffer(self.upload_file.read(), mime=True) self.upload_file.seek(0, os.SEEK_SET) except IOError as e: # Not that important if call above fails self.mimetype = None elif self.clear: resource['url_type'] = ''
def path_from_filename(self, rid, filename): """ Returns a bucket path for the given resource_id and filename. :param rid: The resource ID. :param filename: The unmunged resource filename. """ return os.path.join( 'resources', rid, munge.munge_filename(filename) )
def delete(self, id, filename=None): ''' Delete file we are pointing at''' if filename is None: filename = os.path.basename(self.url) filename = munge.munge_filename(filename) key_path = self.get_path(id, filename) try: self.clear_key(key_path) except ClientError as ex: log.warning("Key '%s' not found in bucket '%s' for delete", key_path, self.bucket_name) pass
def delete(self, id, filename=None): ''' Delete file we are pointing at''' if filename is None: filename = os.path.basename(self.url) filename = munge.munge_filename(filename) key_path = self.get_path(id, filename) try: self.clear_key(key_path) except ClientError: log.warning('Key {0} not found in bucket {1} for delete'.format( key_path, self.bucket_name)) pass
def __init__(self, resource): '''Setup the resource uploader. Actual uploading performed by `upload()`. Create a storage path in the format: <ckanext.s3filestore.aws_storage_path>/resources/ ''' super(S3ResourceUploader, self).__init__() self.use_filename = toolkit.asbool( config.get('ckanext.s3filestore.use_filename', False)) path = config.get('ckanext.s3filestore.aws_storage_path', '') self.storage_path = os.path.join(path, 'resources') self.filename = None self.old_filename = None self.url = resource['url'] # Hold onto resource just in case we need to fallback to Default ResourceUpload from core ckan.lib.uploader self.resource = resource upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filesize = 0 # bytes self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.datetime.utcnow() self.mimetype = resource.get('mimetype') if not self.mimetype: try: self.mimetype = resource[ 'mimetype'] = mimetypes.guess_type(self.filename, strict=False)[0] except Exception: pass self.upload_file = _get_underlying_file(upload_field_storage) self.upload_file.seek(0, os.SEEK_END) self.filesize = self.upload_file.tell() # go back to the beginning of the file buffer self.upload_file.seek(0, os.SEEK_SET) elif self.clear and resource.get('id'): # New, not yet created resources can be marked for deletion if the # users cancels an upload and enters a URL instead. old_resource = model.Session.query(model.Resource) \ .get(resource['id']) self.old_filename = old_resource.url resource['url_type'] = ''
def metadata(self, id, filename=None): if filename is None: filename = os.path.basename(self.url) filename = munge.munge_filename(filename) key_path = self.get_path(id, filename) key = filename if key is None: log.warning("Key '%s' not found in bucket '%s'", key_path, self.bucket_name) try: # Small workaround to manage downloading of large files # We are using redirect to minio's resource public URL client = self.get_s3_client() metadata = client.head_object(Bucket=self.bucket_name, Key=key_path) metadata['content_type'] = metadata['ContentType'] # Drop non public metadata metadata.pop('ServerSideEncryption', None) metadata.pop('SSECustomerAlgorithm', None) metadata.pop('SSECustomerKeyMD5', None) metadata.pop('SSEKMSKeyId', None) metadata.pop('StorageClass', None) metadata.pop('RequestCharged', None) metadata.pop('ReplicationStatus', None) metadata.pop('ObjectLockLegalHoldStatus', None) metadata['size'] = metadata['ContentLength'] metadata['hash'] = metadata['ETag'] return self.as_clean_dict(metadata) except ClientError as ex: if ex.response['Error']['Code'] in ['NoSuchKey', '404']: if config.get( 'ckanext.s3filestore.filesystem_download_fallback', False): log.info('Attempting filesystem fallback for resource %s', id) default_resource_upload = DefaultResourceUpload( self.resource) return default_resource_upload.metadata(id) #Uploader interface does not know about s3 errors raise OSError(errno.ENOENT)
def get_path(self, id, filename=None): '''Return the key used for this resource in S3. Keys are in the form: <ckanext.s3filestore.aws_storage_path>/resources/<resource id>/<filename> e.g.: my_storage_path/resources/165900ba-3c60-43c5-9e9c-9f8acd0aa93f/data.csv ''' if filename is None: filename = os.path.basename(self.url) filename = munge.munge_filename(filename) directory = self.get_directory(id, self.storage_path) filepath = os.path.join(directory, filename) return filepath
def _add_user_extras(user_obj, user_dict): for key, value in user_obj.extras.iteritems(): if key in user_dict: log.warning( "Trying to override user data with extra variable '%s'", key) continue if key in ('blog', 'www_page', 'translations'): if value: user_dict[key] = json.loads(value) else: user_dict[key] = value image_url = user_dict.get('image_url', None) user_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) user_dict['image_display_url'] = helpers.url_for_static( 'uploads/user/%s' % user_dict.get('image_url'), qualified=True) return user_dict
def validate(context, resource, schema_config): schema_name = resource.get("validator_schema") if not schema_name: return if schema_name not in schema_config: raise IOError("Could not find schema") schema = schema_config.get(schema_name).schema upload_field_storage = resource.get("upload") log.debug(upload_field_storage) if isinstance(upload_field_storage, FileStorage): file_string = upload_field_storage._file.read() elif isinstance(upload_field_storage, cgi.FieldStorage): file_string = upload_field_storage.file.read() else: raise plugins.toolkit.ValidationError({ "No file uploaded": [ "Please choose a file to upload (not a link), you might need to reselect the file" ] }) filename = munge.munge_filename(upload_field_storage.filename) extension = filename.split(".")[-1] scheme = "stream" file_upload = cStringIO.StringIO(file_string) if extension == "csv": scheme = "text" file_upload = file_string.decode("utf-8").encode("ascii", "ignore") checks = ["schema"] if schema.get("transpose"): file_upload = transpose(file_upload, extension) if "custom-constraint" in schema: checks.append( {"custom-constraint": schema.get("custom-constraint", {})}) report = goodtables.validate(file_upload, format=extension, scheme=scheme, schema=schema, checks=checks) log.info(report) return report, schema
def delete_ckan_record(package_id): """ Remove a dataset and its associated resource from CKAN :param package_id: :return: Nothing """ # First, verify and get the resource ID package_record = get_ckan_record(package_id) if len(package_record) == 0: logger.warn("Cannot find record {0} to delete".format(package_id)) return # Get rid of the resource remote_ckan_url = Config.get('ckan', 'remote_url') remote_ckan_api = Config.get('ckan', 'remote_api_key') user_agent = Config.get('web', 'user_agent') # Delete the local file if it exists gcdocs_file = os.path.join( doc_intake_dir, munge_filename(os.path.basename( package_record['resources'][0]['name']))) if os.path.exists(gcdocs_file): os.remove(gcdocs_file) with RemoteCKAN(remote_ckan_url, user_agent=user_agent, apikey=remote_ckan_api) as ckan_instance: try: delete_blob( ckan_container, 'resources/{0}/{1}'.format( package_record['resources'][0]['id'], package_record['resources'][0]['name'].lower())) ckan_instance.action.package_delete(id=package_record['id']) ckan_instance.action.dataset_purge(id=package_record['id']) logger.info("Deleted expired CKAN record {0}".format( package_record['id'])) except Exception as ex: logger.error("Unexpected error when deleting record {0}".format( ex.message))
def resource_dictize(res, context): model = context['model'] resource = d.table_dictize(res, context) extras = resource.pop("extras", None) if extras: resource.update(extras) # some urls do not have the protocol this adds http:// to these url = resource['url'] ## for_edit is only called at the times when the dataset is to be edited ## in the frontend. Without for_edit the whole qualified url is returned. if resource.get('url_type') == 'upload' and not context.get('for_edit'): cleaned_name = munge.munge_filename(url) resource['url'] = h.url_for('resource.download', id=resource['package_id'], resource_id=res.id, filename=cleaned_name, qualified=True) elif resource['url'] and not urlparse.urlsplit(url).scheme and not context.get('for_edit'): resource['url'] = u'http://' + url.lstrip('/') return resource
def _add_user_extras(user_obj, user_dict): for key, value in user_obj.extras.iteritems(): if key in user_dict: log.warning("Trying to override user data with extra variable '%s'", key) continue if key in ('blog', 'www_page', 'translations'): if value: user_dict[key] = json.loads(value) else: user_dict[key] = value image_url = user_dict.get('image_url', None) user_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) user_dict['image_display_url'] = helpers.url_for_static( 'uploads/user/%s' % user_dict.get('image_url'), qualified=True ) return user_dict
def __init__(self, file_dict): path = uploader.get_storage_path() if not path: self.storage_path = None return self.storage_path = os.path.join(path, 'global') try: os.makedirs(self.storage_path) except OSError as e: # errno 17 is file already exists if e.errno != 17: raise self.filename = os.path.basename(file_dict.get('filename')) if file_dict.get('filename') else None upload_field_storage = file_dict.pop('upload', None) if isinstance(upload_field_storage, cgi.FieldStorage): self._update_filename(upload_field_storage) self.filename = munge.munge_filename(self.filename) file_dict['filename'] = self.filename self.upload_file = upload_field_storage.file
def update_data_dict(self, data_dict, url_field, file_field, clear_field): """ Manipulate data from the data_dict. url_field is the name of the field where the upload is going to be. file_field is name of the key where the FieldStorage is kept (i.e the field where the file data actually is). clear_field is the name of a boolean field which requests the upload to be deleted. This needs to be called before it reaches any validators""" self.url = data_dict.get(url_field, '') self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) if not self.storage_path: return if self.old_filename: self.old_filepath = os.path.join(self.storage_path, data_dict.get('name'), self.old_filename) if isinstance(self.upload_field_storage, (ALLOWED_UPLOAD_TYPES)): self.filename = self.upload_field_storage.filename self.filename = munge.munge_filename(self.filename) organization_storagepath = os.path.join(self.storage_path, data_dict.get('name')) _make_dirs_if_not_existing(organization_storagepath) self.filepath = os.path.join(organization_storagepath, self.filename) data_dict[url_field] = self.filename data_dict['url_type'] = 'upload' self.upload_file = _get_underlying_file(self.upload_field_storage) self.tmp_filepath = self.filepath + '~' # keep the file if there has been no change elif self.old_filename and not self.old_filename.startswith('http'): if not self.clear: data_dict[url_field] = self.old_filename if self.clear and self.url == self.old_filename: data_dict[url_field] = ''
def __init__(self, resource): '''Setup the resource uploader. Actual uploading performed by `upload()`. Create a storage path in the format: <ckanext.s3filestore.aws_storage_path>/resources/ ''' super(S3ResourceUploader, self).__init__() path = config.get('ckanext.s3filestore.aws_storage_path', '') self.storage_path = os.path.join(path, 'resources') self.filename = None self.old_filename = None upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) if isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES): self.filename = upload_field_storage.filename self.filename = munge.munge_filename(self.filename) resource['url'] = self.filename resource['url_type'] = 'upload' resource['last_modified'] = datetime.datetime.utcnow() self.mimetype = resource.get('mimetype') if not self.mimetype: try: self.mimetype = resource['mimetype'] = mimetypes.guess_type(self.filename, strict=False)[0] except Exception: pass self.upload_file = _get_underlying_file(upload_field_storage) elif self.clear and resource.get('id'): # New, not yet created resources can be marked for deletion if the # users cancels an upload and enters a URL instead. old_resource = model.Session.query(model.Resource) \ .get(resource['id']) self.old_filename = old_resource.url resource['url_type'] = ''
def walk(bucket, dir, files): for file in files: full_path = os.path.join(resource_path, dir, file) if not os.path.isfile(full_path) or full_path.endswith('~'): continue key_name = full_path[len(resource_path):] for key in bucket.list(prefix=key_name.lstrip('/')): key.delete() resource_id = key_name.replace('/', '') resource = model.Resource.get(resource_id) if not resource: continue last_part = resource.url.split('/')[-1] file_name = munge.munge_filename(last_part) key_name = key_name + '/' + file_name key = s3key.Key(bucket) key.key = key_name key.set_contents_from_filename(full_path) print 'Archived %s' % key_name os.remove(full_path)
def test_munge_filename(self): '''Munge a list of filenames gives expected results.''' for org, exp in self.munge_list: munge = munge_filename(org) assert_equal(munge, exp)
def _save_shape_file_as_resource(self, lat, lon, shape_file_name, watershed_des, organization): source = 'delineate.delineatewatershed._save_shape_file_as_resource():' ajax_response = d_helper.AJAXResponse() if not self._validate_file_name(shape_file_name): ajax_response.success = False ajax_response.message = 'Invalid shape file name:%s.' % shape_file_name + '\nFile name needs to have only ' \ 'alphanumeric characters and ' \ 'dash, hyphen or space characters.' return ajax_response.to_json() # TODO: make the saving of the file to temp directory a separate function ckan_default_dir = d_helper.StringSettings.ckan_user_session_temp_dir session_id = base.session['id'] shape_files_source_dir = os.path.join(ckan_default_dir, session_id, 'ShapeFiles') target_zip_dir = os.path.join(ckan_default_dir, session_id, 'ShapeZippedFile') shape_zip_file = os.path.join(target_zip_dir, shape_file_name + '.zip') if not os.path.isdir(shape_files_source_dir): log.error(source + 'CKAN error: Expected shape file source dir path (%s) is missing.' % shape_files_source_dir) ajax_response.success = False ajax_response.message = _('Failed to save the watershed shape file.') return ajax_response.to_json() if not os.path.exists(shape_zip_file): #create the watershed zip file first if os.path.isdir(target_zip_dir): shutil.rmtree(target_zip_dir) os.makedirs(target_zip_dir) files_to_archive = shape_files_source_dir + '/' + 'Watershed.*' zipper = zipfile.ZipFile(shape_zip_file, 'w') for file_to_zip in glob.glob(files_to_archive): zipper.write(file_to_zip, os.path.basename(file_to_zip), compress_type=zipfile.ZIP_DEFLATED) zipper.close() # TODO: make the creation of a new package a new function # create a package package_create_action = tk.get_action('package_create') # create unique package name using the current time stamp as a postfix to any package name unique_postfix = datetime.now().isoformat().replace(':', '-').replace('.', '-').lower() pkg_title = shape_file_name # + '_' pkg_name = shape_file_name.replace(' ', '-').lower() data_dict = { 'name': pkg_name + '_' + unique_postfix, 'type': 'geographic-feature-set', 'title': pkg_title, 'author': tk.c.userObj.name if tk.c.userObj else tk.c.author, # TODO: userObj is None always. Need to retrieve user full name 'notes': 'This is a dataset that contains a watershed shape zip file for an outlet' ' location at latitude:%s and longitude:%s. ' % (lat, lon) + watershed_des, 'owner_org': organization, 'variable_name': '', # extra metadata field begins from here 'variable_unit': '', 'north_extent': '', 'south_extent': '', 'east_extent': '', 'west_extent': '', 'projection': 'WGS_1984', # this what our delineation service sets for the watershed 'dataset_type': 'geographic-feature-set' } context = {'model': base.model, 'session': base.model.Session, 'user': tk.c.user or tk.c.author, 'save': 'save'} try: pkg_dict = package_create_action(context, data_dict) log.info(source + 'A new dataset was created with name: %s' % data_dict['title']) except Exception as e: log.error(source + 'Failed to create a new dataset for saving watershed shape file as' ' a resource.\n Exception: %s' % e) ajax_response.success = False ajax_response.message = _('Failed to create a new dataset for' ' saving watershed shape file as a resource.') return ajax_response.to_json() # TODO: make the add resource to a package a new function if not 'resources' in pkg_dict: pkg_dict['resources'] = [] file_name = munge.munge_filename(shape_file_name + '.zip') resource = {'url': file_name, 'url_type': 'upload'} upload = uploader.ResourceUpload(resource) upload.filename = file_name upload.upload_file = open(shape_zip_file, 'r') data_dict = {'format': 'zip', 'name': file_name, 'url': file_name, 'url_type': 'upload'} pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False # update the package package_update_action = tk.get_action('package_update') package_update_action(context, pkg_dict) context.pop('defer_commit') except Exception as e: log.error(source + 'Failed to update the new dataset for adding watershed shape file as' ' a resource.\n Exception: %s' % e) ajax_response.success = False ajax_response.message = _('Failed to save watershed shape file as a resource.') return ajax_response.to_json() # Get out resource_id resource from model as it will not appear in # package_show until after commit upload.upload(context['package'].resources[-1].id, uploader.get_max_resource_size()) base.model.repo.commit() ajax_response.success = True ajax_response.message = _('Watershed shape file was saved as a resource.') return ajax_response.to_json()
def group_dictize(group, context): result_dict = d.table_dictize(group, context) result_dict['display_name'] = group.display_name result_dict['extras'] = extras_dict_dictize( group._extras, context) include_datasets = context.get('include_datasets', True) q = { 'facet': 'false', 'rows': 0, } if group.is_organization: q['fq'] = 'owner_org:"{0}"'.format(group.id) else: q['fq'] = 'groups:"{0}"'.format(group.name) is_group_member = (context.get('user') and new_authz.has_user_permission_for_group_or_org(group.id, context.get('user'), 'read')) if is_group_member: context['ignore_capacity_check'] = True if include_datasets: q['rows'] = 1000 # Only the first 1000 datasets are returned context_ = dict((k, v) for (k, v) in context.items() if k != 'schema') search_results = logic.get_action('package_search')(context_, q) if include_datasets: result_dict['packages'] = search_results['results'] result_dict['package_count'] = search_results['count'] context['with_capacity'] = True result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) result_dict['groups'] = group_list_dictize( _get_members(context, group, 'groups'), context) result_dict['users'] = user_list_dictize( _get_members(context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename(image_url) result_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % result_dict.get('image_url'), qualified = True ) return result_dict
def _save_ueb_package_as_dataset(service_call_results, model_config_dataset_id): source = 'uebpackage.tasks._save_ueb_package_as_dataset():' ckan_default_dir = uebhelper.StringSettings.ckan_user_session_temp_dir # '/tmp/ckan' # get the matching model configuration dataset object model_config_dataset_obj = base.model.Package.get(model_config_dataset_id) model_config_dataset_title = model_config_dataset_obj.title model_config_dataset_owner_org = model_config_dataset_obj.owner_org model_config_dataset_author = model_config_dataset_obj.author # create a directory for saving the file # this will be a dir in the form of: /tmp/ckan/{random_id} random_id = base.model.types.make_uuid() destination_dir = os.path.join(ckan_default_dir, random_id) os.makedirs(destination_dir) model_pkg_filename = uebhelper.StringSettings.ueb_input_model_package_default_filename # 'ueb_model_pkg.zip' model_pkg_file = os.path.join(destination_dir, model_pkg_filename) bytes_to_read = 16 * 1024 try: with open(model_pkg_file, 'wb') as file_obj: while True: data = service_call_results.read(bytes_to_read) if not data: break file_obj.write(data) except Exception as e: log.error(source + 'Failed to save the ueb_package zip file to temporary ' 'location for UEB model configuration dataset ID: %s \n ' 'Exception: %s' % (model_config_dataset_id, e)) raise e log.info(source + 'ueb_package zip file was saved to temporary location for ' 'UEB model configuration dataset ID: %s' % model_config_dataset_id) # upload the file to CKAN file store # resource_metadata = _upload_file(model_pkg_file) # if resource_metadata: # log.info(source + 'UEB model package zip file was uploaded for model configuration dataset ID:%s' % model_config_dataset_id) # else: # log.error(source + 'Failed to upload UEB model package zip file ' # 'for model configuration dataset ID: %s' % model_config_dataset_id) # return # # # retrieve some of the file meta data # resource_url = resource_metadata.get('_label') # this will return datetime stamp/filename # # resource_url = '/storage/f/' + resource_url # if resource_url.startswith('/'): # resource_url = base.config.get('ckan.site_url', '').rstrip('/') + resource_url # else: # resource_url = base.config.get('ckan.site_url', '') + resource_url # # resource_created_date = resource_metadata.get('_creation_date') # resource_name = resource_metadata.get('filename_original') # resource_size = resource_metadata.get('_content_length') # # # add the uploaded ueb model pkg data file as a resource to the dataset # resource_create_action = tk.get_action('resource_create') # context = {'model': base.model, 'session': base.model.Session, 'save': 'save'} # user = uebhelper.get_site_user() # context['user'] = user.get('name') # context['ignore_auth'] = True # context['validate'] = False user = uebhelper.get_site_user() # create a package package_create_action = tk.get_action('package_create') # create unique package name using the current time stamp as a postfix to any package name unique_postfix = datetime.now().isoformat().replace(':', '-').replace('.', '-').lower() pkg_title = model_config_dataset_title data_dict = { 'name': 'model_package_' + unique_postfix, # this needs to be unique as required by DB 'type': 'model-package', # dataset type as defined in custom dataset plugin 'title': pkg_title, 'owner_org': model_config_dataset_owner_org, 'author': model_config_dataset_author, 'notes': 'UEB model package', 'pkg_model_name': 'UEB', 'model_version': '1.0', 'north_extent': '', 'south_extent': '', 'east_extent': '', 'west_extent': '', 'simulation_start_day': '', 'simulation_end_day': '', 'time_step': '', 'package_type': u'Input', 'package_run_status': 'Not yet submitted', 'package_run_job_id': '', 'dataset_type': 'model-package' } context = {'model': base.model, 'session': base.model.Session, 'ignore_auth': True, 'user': user.get('name'), 'save': 'save'} try: uebhelper.register_translator() # this is needed since we are creating a package in a background operation pkg_dict = package_create_action(context, data_dict) log.info(source + 'A new dataset was created for UEB input model package with name: %s' % data_dict['title']) except Exception as e: log.error(source + 'Failed to create a new dataset for ueb input model package for' ' the related model configuration dataset title: %s \n Exception: %s' % (pkg_title, e)) raise e pkg_id = pkg_dict['id'] if not 'resources' in pkg_dict: pkg_dict['resources'] = [] file_name = munge.munge_filename(model_pkg_filename) resource = {'url': file_name, 'url_type': 'upload'} upload = uploader.ResourceUpload(resource) upload.filename = file_name upload.upload_file = open(model_pkg_file, 'r') data_dict = {'format': 'zip', 'name': file_name, 'url': file_name, 'url_type': 'upload'} pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False # update the package package_update_action = tk.get_action('package_update') package_update_action(context, pkg_dict) context.pop('defer_commit') except Exception as e: log.error(source + ' Failed to update the new dataset for adding the input model package zip file as' ' a resource.\n Exception: %s' % e) raise e # link this newly created model package dataset to the model configuration dataset package_relationship_create_action = tk.get_action('package_relationship_create') data_dict = {'subject': pkg_id, 'object': model_config_dataset_id, 'type': 'links_to'} package_relationship_create_action(context, data_dict) # Get out resource_id resource from model as it will not appear in # package_show until after commit upload.upload(context['package'].resources[-1].id, uploader.get_max_resource_size()) base.model.repo.commit() # update the related model configuration dataset to show that the package is available data_dict = {'package_availability': 'Available'} update_msg = 'system auto updated ueb package dataset' background_task = True try: updated_package = uebhelper.update_package(model_config_dataset_id, data_dict, update_msg, background_task) log.info(source + 'UEB model configuration dataset was updated as a result of ' 'receiving model input package for dataset:%s' % updated_package['name']) except Exception as e: log.error(source + 'Failed to update UEB model configuration dataset after ' 'receiving model input package for dataset ID:%s \n' 'Exception: %s' % (model_config_dataset_id, e)) raise e
def test_munge_filename(self): '''Munge a list of filenames gives expected results.''' for org, exp in self.munge_list: munge = munge_filename(org) nose_tools.assert_equal(munge, exp)
def check_and_create_csv(self, context, resource): log.debug("Resource: %s" % str(resource)) resource_filename = os.path.basename(resource.get('url')) if not resource_filename: return log.debug('resource_filename: %s' % resource_filename) try: # get the config of this plugin infoplus_schema_file = pluginconf.get('ckanext.resourcecsv.schemas.infoplus', False) if not infoplus_schema_file: pass # log.debug('Infoplus_schema_file: %s' % str(infoplus_schema_file)) infoplus_schema = json.load(self._load_schema_module_path(infoplus_schema_file)) except Exception as e: log.error('ResourceCSV Plugin scheming error: %s' % str(e)) return # log.debug('infoplus_schema: %s' % str(infoplus_schema)) # check if the file is in the schema munged_filename = munge_filename(resource_filename) log.debug('munged_filename: %s' % munged_filename) coldef = [] for key,cdef in infoplus_schema.iteritems(): if munge_filename(key) != resource_filename: continue # skip coldef = cdef break if not len(coldef): log.info("Key %s not found in munged infoplus_schema" % str(munged_filename)) return # download the file to a tmp location # with tempfile.NamedTemporaryFile(mode='ab+') as tmpfile: # tmpfile = tempfile.NamedTemporaryFile(mode='ab+') uploadfile = os.path.join('/tmp/', resource_filename) + '.csv' tmpfile = open(uploadfile, 'ab+') log.info("Downloading %s" % os.path.basename(resource.get('url'))) data = urllib2.urlopen(resource.get('url')).readlines() # write a dummy header row tmpfile.write( self.DELIMITER.join('%s %d' % (_('Column'), i+1) for i in range(0, len(coldef) + 1)) + "\r" ) # replace the defined characters in each line with a delimiter for line in data: # ignore any lines that start with a comment if line.startswith('#'): continue # skip if line.startswith('*'): continue # skip if line.startswith('%'): continue # skip # line = line.encode('utf-8') tl = list(line) # explode # inject the delimiter for col in coldef: tl[col] = self.DELIMITER line = "".join(tl) # implode log.debug(line) tmpfile.write(line) # establish a connection to ckan try: site_url = pluginconf.get('ckan.site_url', None) api_key = model.User.get(context['user']).apikey.encode('utf8') ckan = ckanapi.RemoteCKAN(site_url, apikey=api_key, user_agent='ckanapi/1.0 (+%s)' % site_url ) log.debug("Connected to %s" % site_url) except ckanapi.NotAuthorized, e: log.error('User not authorized') return False
def group_dictize(group, context, include_groups=True, include_tags=True, include_users=True, include_extras=True, packages_field='datasets', **kw): ''' Turns a Group object and related into a dictionary. The related objects like tags are included unless you specify it in the params. :param packages_field: determines the format of the `packages` field - can be `datasets`, `dataset_count`, `none_but_include_package_count` or None. If set to `dataset_count` or `none_but_include_package_count` then you can precalculate dataset counts in advance by supplying: context['dataset_counts'] = get_group_dataset_counts() ''' assert packages_field in ('datasets', 'dataset_count', 'none_but_include_package_count', None) if packages_field in ('dataset_count', 'none_but_include_package_count'): dataset_counts = context.get('dataset_counts', None) result_dict = d.table_dictize(group, context) result_dict.update(kw) result_dict['display_name'] = group.title or group.name if include_extras: result_dict['extras'] = extras_dict_dictize( group._extras, context) context['with_capacity'] = True if packages_field: def get_packages_for_this_group(group_): # Ask SOLR for the list of packages for this org/group q = { 'facet': 'false', 'rows': 0, } if group_.is_organization: q['fq'] = 'owner_org:"{0}"'.format(group_.id) else: q['fq'] = 'groups:"{0}"'.format(group_.name) # Allow members of organizations to see private datasets. if group_.is_organization: is_group_member = (context.get('user') and new_authz.has_user_permission_for_group_or_org( group_.id, context.get('user'), 'read')) if is_group_member: context['ignore_capacity_check'] = True if not context.get('for_view'): q['rows'] = 1000 # Only the first 1000 datasets are returned search_context = dict((k, v) for (k, v) in context.items() if k != 'schema') search_results = logic.get_action('package_search')(search_context, q) return search_results['count'], search_results['results'] if packages_field == 'datasets': package_count, packages = get_packages_for_this_group(group) result_dict['packages'] = packages else: # i.e. packages_field is 'dataset_count' or # 'none_but_include_package_count' if dataset_counts is None: package_count, packages = get_packages_for_this_group(group) else: # Use the pre-calculated package_counts passed in. facets = dataset_counts if group.is_organization: package_count = facets['owner_org'].get(group.id, 0) else: package_count = facets['groups'].get(group.name, 0) if packages_field != 'none_but_include_package_count': result_dict['packages'] = package_count result_dict['package_count'] = package_count if include_tags: # group tags are not creatable via the API yet, but that was(/is) a # future intention (see kindly's commit 5c8df894 on 2011/12/23) result_dict['tags'] = tag_list_dictize( _get_members(context, group, 'tags'), context) if include_groups: # these sub-groups won't have tags or extras for speed result_dict['groups'] = group_list_dictize( _get_members(context, group, 'groups'), context, include_groups=True) if include_users: result_dict['users'] = user_list_dictize( _get_members(context, group, 'users'), context) context['with_capacity'] = False if context.get('for_view'): if result_dict['is_organization']: plugin = plugins.IOrganizationController else: plugin = plugins.IGroupController for item in plugins.PluginImplementations(plugin): result_dict = item.before_view(result_dict) image_url = result_dict.get('image_url') result_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): #munge here should not have an effect only doing it incase #of potential vulnerability of dodgy api input image_url = munge.munge_filename(image_url) result_dict['image_display_url'] = h.url_for_static( 'uploads/group/%s' % result_dict.get('image_url'), qualified = True ) return result_dict
def package_update(context, data_dict): '''Update a dataset (package). You must be authorized to edit the dataset and the groups that it belongs to. Plugins may change the parameters of this function depending on the value of the dataset's ``type`` attribute, see the ``IDatasetForm`` plugin interface. For further parameters see ``package_create()``. :param id: the name or id of the dataset to update :type id: string :returns: the updated dataset (if 'return_package_dict' is True in the context, which is the default. Otherwise returns just the dataset id) :rtype: dictionary ''' model = context['model'] user = context['user'] name_or_id = data_dict.get("id") or data_dict['name'] pkg = model.Package.get(name_or_id) if pkg is None: raise NotFound(_('Package was not found.')) context["package"] = pkg data_dict["id"] = pkg.id # FIXME: first modifications to package_updade begin here: # tag strings are reconstructed because validators are stripping # tags passed and only taking taks as tag_string values # image upload support has also been added here old_data = get_action('package_show')(context, {'id': pkg.id}) ''' Constructing the tag_string from the given tags. There must be at least one tag, otherwise the tag_string will be empty and a validation error will be raised. ''' if not data_dict.get('tag_string'): data_dict['tag_string'] = ', '.join( h.dict_list_reduce(data_dict.get('tags', {}), 'name')) for key, value in old_data.iteritems() : if key not in data_dict : data_dict[key] = value #data_dict['resources'] = data_dict.get('resources', old_data.get('resources')) # iso_topic_cat = data_dict.get('iso_topic_string', []) # if isinstance(iso_topic_cat, basestring): # iso_topic_cat = [iso_topic_cat] # # data_dict['iso_topic_string'] = ','.join(iso_topic_cat) #Set the package last modified date data_dict['record_last_modified'] = str(datetime.date.today()) # If the Created Date has not yet been set, then set it if data_dict['edc_state'] == 'DRAFT' and not data_dict.get('record_create_date'): data_dict['record_create_date'] = str(datetime.date.today()) # If the Publish Date has not yet been set, then set it if data_dict['edc_state'] == 'PUBLISHED' and not data_dict.get('record_publish_date'): data_dict['record_publish_date'] = str(datetime.date.today()) # If the Archive Date has not yet been set, then set it if data_dict['edc_state'] == 'ARCHIVED' and not data_dict.get('record_archive_date'): data_dict['record_archive_date'] = str(datetime.date.today()) _check_access('package_update', context, data_dict) # get the schema package_plugin = lib_plugins.lookup_package_plugin(pkg.type) if 'schema' in context: schema = context['schema'] else: schema = package_plugin.update_package_schema() image_url = old_data.get('image_url', None) upload = uploader.Upload('edc', image_url) upload.update_data_dict(data_dict, 'image_url', 'image_upload', 'clear_upload') #Adding image display url for the uploaded image image_url = data_dict.get('image_url') data_dict['image_display_url'] = image_url if image_url and not image_url.startswith('http'): image_url = munge.munge_filename(image_url) data_dict['image_display_url'] = h.url_for_static('uploads/edc/%s' % data_dict.get('image_url'), qualified=True) if 'api_version' not in context: # check_data_dict() is deprecated. If the package_plugin has a # check_data_dict() we'll call it, if it doesn't have the method we'll # do nothing. check_data_dict = getattr(package_plugin, 'check_data_dict', None) if check_data_dict: try: package_plugin.check_data_dict(data_dict, schema) except TypeError: # Old plugins do not support passing the schema so we need # to ensure they still work. package_plugin.check_data_dict(data_dict) # FIXME: modifications to package_update end here^ data, errors = _validate(data_dict, schema, context) # log.debug('package_update validate_errs=%r user=%s package=%s data=%r', # errors, context.get('user'), # context.get('package').name if context.get('package') else '', # data) if errors: model.Session.rollback() raise ValidationError(errors) rev = model.repo.new_revision() rev.author = user if 'message' in context: rev.message = context['message'] else: rev.message = _(u'REST API: Update object %s') % data.get("name") #avoid revisioning by updating directly model.Session.query(model.Package).filter_by(id=pkg.id).update( {"metadata_modified": datetime.datetime.utcnow()}) model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) context_org_update = context.copy() context_org_update['ignore_auth'] = True context_org_update['defer_commit'] = True _get_action('package_owner_org_update')(context_org_update, {'id': pkg.id, 'organization_id': pkg.owner_org}) for item in plugins.PluginImplementations(plugins.IPackageController): item.edit(pkg) item.after_update(context, data) upload.upload(uploader.get_max_image_size()) #TODO the next two blocks are copied from ckan/ckan/logic/action/update.py # This codebase is currently hard to maintain because large chunks of the # CKAN action API and the CKAN controllers are simply overriden. This is # probably worse than just forking CKAN would have been, because in that # case at least we could track changes. - @deniszgonjanin # Needed to let extensions know the new resources ids model.Session.flush() if data.get('resources'): for index, resource in enumerate(data['resources']): resource['id'] = pkg.resources[index].id # Create default views for resources if necessary if data.get('resources'): logic.get_action('package_create_default_resource_views')( {'model': context['model'], 'user': context['user'], 'ignore_auth': True}, {'package': data}) if not context.get('defer_commit'): model.repo.commit() log.debug('Updated object %s' % pkg.name) return_id_only = context.get('return_id_only', False) # Make sure that a user provided schema is not used on package_show context.pop('schema', None) # we could update the dataset so we should still be able to read it. context['ignore_auth'] = True output = data_dict['id'] if return_id_only \ else _get_action('package_show')(context, {'id': data_dict['id']}) ''' Send state change notifications if required; Added by Khalegh Mamakani Using a thread to run the job in the background so that package_update will not wait for notifications sending. ''' old_state = old_data.get('edc_state') context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} dataset_url = config.get('ckan.site_url') + h.url_for(controller='package', action="read", id = data_dict['name']) import threading notify_thread = threading.Thread(target=check_record_state, args=(context, old_state, data_dict, g.site_title, g.site_url, dataset_url) ) notify_thread.start() return output