def resource_download(self, id, resource_id, filename=None): custom_base.g_analitics() """ Provides a direct download by either redirecting the user to the url stored or downloading an uploaded file directly. """ context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} try: rsc = get_action('resource_show')(context, {'id': resource_id}) get_action('package_show')(context, {'id': id}) # removes the host to make it relative if config.get('ckan.upload_file_url'): url = rsc['url'] if config.get('ckan.upload_file_url') in url: url = url.split(config.get('ckan.upload_file_url')) rsc['url'] = url[1] #c.resource['url'] = rsc['url'] except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) if rsc.get('url_type') == 'upload': upload = uploader.ResourceUpload(rsc) filepath = upload.get_path(rsc['id']) fileapp = paste.fileapp.FileApp(filepath) try: status, headers, app_iter = request.call_application(fileapp) except OSError: abort(404, _('Resource data not found')) response.headers.update(dict(headers)) content_type, content_enc = mimetypes.guess_type( rsc.get('url', '')) if content_type: response.headers['Content-Type'] = content_type response.status = status return app_iter elif not 'url' in rsc: abort(404, _('No download is available')) redirect(rsc['url'])
def resource_download(self, id, resource_id, filename=None): """ Provides a direct download by either redirecting the user to the url stored or downloading an uploaded file directly. """ context = {'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj} if request.method == 'POST': data = data or clean_dict(dict_fns.unflatten(tuplize_dict(parse_params( request.POST)))) # check if resources in package are publicly accessible pkg_dict = get_action('package_show')(context, {'id': id}) res_access = True; if 'res_access' in pkg_dict: res_access = pkg_dict['res_access'] try: rsc = get_action('resource_show')(context, {'id': resource_id, 'res_access': res_access}) get_action('package_show')(context, {'id': id}) except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) if rsc.get('url_type') == 'upload': upload = uploader.ResourceUpload(rsc) filepath = upload.get_path(rsc['id']) fileapp = paste.fileapp.FileApp(filepath) try: status, headers, app_iter = request.call_application(fileapp) except OSError: abort(404, _('Resource data not found')) response.headers.update(dict(headers)) content_type, content_enc = mimetypes.guess_type( rsc.get('url', '')) if content_type: response.headers['Content-Type'] = content_type response.status = status return app_iter elif not 'url' in rsc: abort(404, _('No download is available')) redirect(rsc['url'])
def filesystem_resource_download(self, id, resource_id, filename=None): """ A fallback controller action to download resources from the filesystem. A copy of the action from `ckan.controllers.package:PackageController.resource_download`. Provide a direct download by either redirecting the user to the url stored or downloading an uploaded file directly. """ context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } try: rsc = get_action('resource_show')(context, {'id': resource_id}) get_action('package_show')(context, {'id': id}) except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) if rsc.get('url_type') == 'upload': upload = uploader.ResourceUpload(rsc) filepath = upload.get_path(rsc['id']) fileapp = paste.fileapp.FileApp(filepath) try: status, headers, app_iter = request.call_application(fileapp) except OSError: abort(404, _('Resource data not found')) response.headers.update(dict(headers)) content_type, content_enc = mimetypes.guess_type(rsc.get( 'url', '')) if content_type: response.headers['Content-Type'] = content_type response.status = status return app_iter elif 'url' not in rsc: abort(404, _('No download is available')) redirect(str(rsc['url']))
def zip_list(rsc): if rsc.get('url_type') == 'upload': upload = uploader.ResourceUpload(rsc) value = None try: zf = zipfile.ZipFile(upload.get_path(rsc['id']), 'r') value = zf.filelist except Exception, e: # Sometimes values that can't be converted to ints can sneak # into the db. In this case, just leave them as they are. pass if value: return value else: upload = uploader.get_resource_uploader(rsc) url = urlparse(rsc['url']) filename = os.path.basename(url.path) URL = upload.get_url_from_filename(rsc['id'], filename, '') return getZipListFromURL(URL)
def exportPackages(query): #create temporary directory tmp_dir = tempfile.mkdtemp() #dump package to json file_json = open('%s/package.json' % tmp_dir, 'w') dumper.SimpleDumper().dump_json(file_json, query) file_json.flush() #dump package to csv file_csv = open('%s/package.csv' % tmp_dir, 'w') dumper.SimpleDumper().dump_csv(file_csv, query) file_csv.flush() #add resource files to tmp directory for pkg in query: pkg_dict = pkg.as_dict() resources = pkg_dict['resources'] for resource in resources: if resource['url_type'] == 'upload': #copy file try: upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) shutil.copyfile( filepath, '%s/%s_%s' % (tmp_dir, resource['id'], resource['url'].split('/')[-1])) except: pass #zip directory up file_zip_path = '%s.zip' % tmp_dir file_zip = zipfile.ZipFile(file_zip_path, 'w') zipdir(tmp_dir, file_zip) file_zip.close() #remove tmp directory shutil.rmtree(tmp_dir) return file_zip_path
def _save_ueb_package_as_dataset(service_call_results, model_config_dataset_id): source = 'uebpackage.tasks._save_ueb_package_as_dataset():' ckan_default_dir = uebhelper.StringSettings.ckan_user_session_temp_dir # '/tmp/ckan' # get the matching model configuration dataset object model_config_dataset_obj = base.model.Package.get(model_config_dataset_id) model_config_dataset_title = model_config_dataset_obj.title model_config_dataset_owner_org = model_config_dataset_obj.owner_org model_config_dataset_author = model_config_dataset_obj.author # create a directory for saving the file # this will be a dir in the form of: /tmp/ckan/{random_id} random_id = base.model.types.make_uuid() destination_dir = os.path.join(ckan_default_dir, random_id) os.makedirs(destination_dir) model_pkg_filename = uebhelper.StringSettings.ueb_input_model_package_default_filename # 'ueb_model_pkg.zip' model_pkg_file = os.path.join(destination_dir, model_pkg_filename) bytes_to_read = 16 * 1024 try: with open(model_pkg_file, 'wb') as file_obj: while True: data = service_call_results.read(bytes_to_read) if not data: break file_obj.write(data) except Exception as e: log.error(source + 'Failed to save the ueb_package zip file to temporary ' 'location for UEB model configuration dataset ID: %s \n ' 'Exception: %s' % (model_config_dataset_id, e)) raise e log.info(source + 'ueb_package zip file was saved to temporary location for ' 'UEB model configuration dataset ID: %s' % model_config_dataset_id) # upload the file to CKAN file store # resource_metadata = _upload_file(model_pkg_file) # if resource_metadata: # log.info(source + 'UEB model package zip file was uploaded for model configuration dataset ID:%s' % model_config_dataset_id) # else: # log.error(source + 'Failed to upload UEB model package zip file ' # 'for model configuration dataset ID: %s' % model_config_dataset_id) # return # # # retrieve some of the file meta data # resource_url = resource_metadata.get('_label') # this will return datetime stamp/filename # # resource_url = '/storage/f/' + resource_url # if resource_url.startswith('/'): # resource_url = base.config.get('ckan.site_url', '').rstrip('/') + resource_url # else: # resource_url = base.config.get('ckan.site_url', '') + resource_url # # resource_created_date = resource_metadata.get('_creation_date') # resource_name = resource_metadata.get('filename_original') # resource_size = resource_metadata.get('_content_length') # # # add the uploaded ueb model pkg data file as a resource to the dataset # resource_create_action = tk.get_action('resource_create') # context = {'model': base.model, 'session': base.model.Session, 'save': 'save'} # user = uebhelper.get_site_user() # context['user'] = user.get('name') # context['ignore_auth'] = True # context['validate'] = False user = uebhelper.get_site_user() # create a package package_create_action = tk.get_action('package_create') # create unique package name using the current time stamp as a postfix to any package name unique_postfix = datetime.now().isoformat().replace(':', '-').replace( '.', '-').lower() pkg_title = model_config_dataset_title data_dict = { 'name': 'model_package_' + unique_postfix, # this needs to be unique as required by DB 'type': 'model-package', # dataset type as defined in custom dataset plugin 'title': pkg_title, 'owner_org': model_config_dataset_owner_org, 'author': model_config_dataset_author, 'notes': 'UEB model package', 'pkg_model_name': 'UEB', 'model_version': '1.0', 'north_extent': '', 'south_extent': '', 'east_extent': '', 'west_extent': '', 'simulation_start_day': '', 'simulation_end_day': '', 'time_step': '', 'package_type': u'Input', 'package_run_status': 'Not yet submitted', 'package_run_job_id': '', 'dataset_type': 'model-package' } context = { 'model': base.model, 'session': base.model.Session, 'ignore_auth': True, 'user': user.get('name'), 'save': 'save' } try: uebhelper.register_translator( ) # this is needed since we are creating a package in a background operation pkg_dict = package_create_action(context, data_dict) log.info( source + 'A new dataset was created for UEB input model package with name: %s' % data_dict['title']) except Exception as e: log.error( source + 'Failed to create a new dataset for ueb input model package for' ' the related model configuration dataset title: %s \n Exception: %s' % (pkg_title, e)) raise e pkg_id = pkg_dict['id'] if not 'resources' in pkg_dict: pkg_dict['resources'] = [] file_name = munge.munge_filename(model_pkg_filename) resource = {'url': file_name, 'url_type': 'upload'} upload = uploader.ResourceUpload(resource) upload.filename = file_name upload.upload_file = open(model_pkg_file, 'r') data_dict = { 'format': 'zip', 'name': file_name, 'url': file_name, 'url_type': 'upload' } pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False # update the package package_update_action = tk.get_action('package_update') package_update_action(context, pkg_dict) context.pop('defer_commit') except Exception as e: log.error( source + ' Failed to update the new dataset for adding the input model package zip file as' ' a resource.\n Exception: %s' % e) raise e # link this newly created model package dataset to the model configuration dataset package_relationship_create_action = tk.get_action( 'package_relationship_create') data_dict = { 'subject': pkg_id, 'object': model_config_dataset_id, 'type': 'links_to' } package_relationship_create_action(context, data_dict) # Get out resource_id resource from model as it will not appear in # package_show until after commit upload.upload(context['package'].resources[-1].id, uploader.get_max_resource_size()) base.model.repo.commit() # update the related model configuration dataset to show that the package is available data_dict = {'package_availability': 'Available'} update_msg = 'system auto updated ueb package dataset' background_task = True try: updated_package = uebhelper.update_package(model_config_dataset_id, data_dict, update_msg, background_task) log.info(source + 'UEB model configuration dataset was updated as a result of ' 'receiving model input package for dataset:%s' % updated_package['name']) except Exception as e: log.error(source + 'Failed to update UEB model configuration dataset after ' 'receiving model input package for dataset ID:%s \n' 'Exception: %s' % (model_config_dataset_id, e)) raise e
def upload_resource_zipfile_to_s3(context, resource): ''' upload_resource_zipfile_to_s3 - Uploads the resource zip file to S3 ''' # Init logger logger = logging.getLogger(__name__) logger.info("Starting upload_resource_zipfile_to_s3 for resource %s" % resource.get('name', '')) # If resource is an API, skip upload if resource.get('format', '') == 'API': return # Get resource's package pkg = toolkit.get_action('package_show')(context, { 'id': resource['package_id'] }) # Initialize resource zip file resource_buff = StringIO.StringIO() resource_zip_archive = zipfile.ZipFile(resource_buff, mode='w') # Initialize metadata metadata = toolkit.get_action('package_metadata_show')(data_dict={ 'id': pkg['id'] }) metadata_yaml_buff = StringIO.StringIO() metadata_yaml_buff.write( unicode("# Metadata for %s\r\n" % pkg["title"]).encode( 'ascii', 'ignore')) yaml.dump(prettify_json(metadata), metadata_yaml_buff, Dumper=MetadataYAMLDumper) # Write metadata to package and updated resource zip resource_zip_archive.writestr('metadata-' + pkg.get('name') + '.txt', metadata_yaml_buff.getvalue()) # Obtain extension type of the resource resource_extension = os.path.splitext(resource['url'])[1] filename = (slugify(resource['name'], to_lower=True) + resource_extension) # Case 1: Resource is not on s3 yet, need to download from CKAN if resource.get('url_type') == 'upload': logger.info("Obtaining resource file from CKAN for resource %s" % resource.get('name', '')) upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) resource_zip_archive.write(filepath, filename) # Case 2: Resource exists outside of CKAN, we should have a URL to download it else: # Try to download the resource from the provided URL try: logger.info("Obtaining file from URL %s" % resource.get('url', '')) session = requests.Session() response = session.get(resource.get('url', ''), timeout=30) # If the response status code is not 200 (i.e. success), raise Exception if response.status_code != 200: logger.error( "Error obtaining resource from the given URL. Response status code is %d" % response.status_code) raise Exception( "Error obtaining resource from the given URL. Response status code is %d" % response.status_code) logger.info("Successfully obtained file from URL %s" % resource.get('url', '')) except requests.exceptions.RequestException: toolkit.abort(404, toolkit._('Resource data not found')) resource_zip_archive.writestr(filename, response.content) # Initialize connection to S3 bucket = setup_s3_bucket() # Upload the resource zip to S3 resource_zip_archive.close() resource_filename = (pkg.get('name') + '/' + 'resources' + '/' + slugify(resource.get('name'), to_lower=True) + '.zip') try: logger.info("Uploading resource zipfile to S3 for resource %s" % resource.get('name', '')) obj = bucket.put_object(Key=resource_filename, Body=resource_buff.getvalue(), ContentType='application/zip') # Set permissions of the S3 object to be readable by public obj.Acl().put(ACL='public-read') logger.info( "Successfully uploaded resource zipfile to S3 for resource %s" % resource.get('name', '')) except Exception as exception: # Log the error and reraise the exception logger.error("Error uploading resource %s zipfile to S3" % (resource['name'])) logger.error(exception) raise exception
def upload_resource_to_s3(context, resource): ''' upload_resource_to_s3 Uploads resource to S3 and modifies the following resource fields: - 'upload' - 'url_type' - 'url' ''' # Init logger logger = logging.getLogger(__name__) logger.info("Starting upload_resource_to_s3 for resource %s" % resource.get('name', '')) # Init connection to S3 bucket = setup_s3_bucket() # Get content type and extension content_type, _ = mimetypes.guess_type(resource.get('url', '')) extension = mimetypes.guess_extension(content_type) # Upload to S3 timestamp = datetime.datetime.utcnow() pkg = toolkit.get_action('package_show')(context, { 'id': resource['package_id'] }) filename = ( resource.get("timestamp", timestamp.strftime("%Y-%m-%dT%H-%M-%SZ")) + "-" + slugify(resource.get("name"), to_lower=True) + extension) s3_filepath = "/".join([pkg.get("name"), "resources", filename]) # If file is currently being uploaded, the file is in resource['upload'] if isinstance(resource.get('upload'), cgi.FieldStorage): logger.info("File is being uploaded") resource['upload'].file.seek(0) body = resource['upload'].file # If resource.get('url_type') == 'upload' then the resource is in CKAN file system elif resource.get('url_type') == 'upload': logger.info("File is on CKAN file store") upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) try: body = open(filepath, 'r') except OSError: abort(404, _('Resource data not found')) else: return ## in datagovuk, we don't want to upload from URL try: logger.info("Uploading resource %s to S3" % resource.get('name', '')) bucket.Object(s3_filepath).delete() obj = bucket.put_object(Key=s3_filepath, Body=body, ContentType=content_type) obj.Acl().put(ACL='public-read') logger.info("Successfully uploaded resource %s to S3" % resource.get('name', '')) except ClientError as exception: # Log the error and reraise the exception logger.error("Error uploading resource %s from package %s to S3" % (resource['name'], resource['package_id'])) logger.error(exception) raise exception finally: if resource.get('url_type') == 'upload': body.close() # Modify fields in resource resource['upload'] = '' resource['url_type'] = 's3' resource['url'] = config.get('ckan.datagovuk.s3_url_prefix') + s3_filepath update_timestamp(resource, timestamp)
def resource_create(context, data_dict): '''Appends a new resource to a datasets list of resources. :param package_id: id of package that the resource needs should be added to. :type package_id: string :param url: url of resource :type url: string :param revision_id: (optional) :type revisiion_id: string :param description: (optional) :type description: string :param format: (optional) :type format: string :param hash: (optional) :type hash: string :param name: (optional) :type name: string :param resource_type: (optional) :type resource_type: string :param mimetype: (optional) :type mimetype: string :param mimetype_inner: (optional) :type mimetype_inner: string :param webstore_url: (optional) :type webstore_url: string :param cache_url: (optional) :type cache_url: string :param size: (optional) :type size: int :param created: (optional) :type created: iso date string :param last_modified: (optional) :type last_modified: iso date string :param cache_last_updated: (optional) :type cache_last_updated: iso date string :param webstore_last_updated: (optional) :type webstore_last_updated: iso date string :param upload: (optional) :type upload: FieldStorage (optional) needs multipart/form-data :returns: the newly created resource :rtype: dictionary ''' model = context['model'] user = context['user'] package_id = _get_or_bust(data_dict, 'package_id') pkg_dict = _get_action('package_show')(context, {'id': package_id}) _check_access('resource_create', context, data_dict) if not 'resources' in pkg_dict: pkg_dict['resources'] = [] upload = uploader.ResourceUpload(data_dict) pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False context['defer_audit'] = True #owner_org = pkg_dict.get('owner_org', '') #if owner_org: # org_free_space = _get_action('organization_available_space')({'ignore_auth' : True}, {'id' : owner_org}) #else: # log.warn('unknown package owner') # org_free_space = True #if not org_free_space: # raise logic.ValidationError( # {'upload': [_('There is no free space in organization')]} # ) if data_dict.get('actor_id', None): pkg_dict['actor_id'] = data_dict['actor_id'] _get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except ValidationError, e: errors = e.error_dict['resources'][-1] raise ValidationError(errors)
def resource_create(context, data_dict): '''Appends a new resource to a datasets list of resources. :param package_id: id of package that the resource needs should be added to. :type package_id: string :param url: url of resource :type url: string :param revision_id: (optional) :type revisiion_id: string :param description: (optional) :type description: string :param format: (optional) :type format: string :param hash: (optional) :type hash: string :param name: (optional) :type name: string :param resource_type: (optional) :type resource_type: string :param mimetype: (optional) :type mimetype: string :param mimetype_inner: (optional) :type mimetype_inner: string :param webstore_url: (optional) :type webstore_url: string :param cache_url: (optional) :type cache_url: string :param size: (optional) :type size: int :param created: (optional) :type created: iso date string :param last_modified: (optional) :type last_modified: iso date string :param cache_last_updated: (optional) :type cache_last_updated: iso date string :param webstore_last_updated: (optional) :type webstore_last_updated: iso date string :param upload: (optional) :type upload: FieldStorage (optional) needs multipart/form-data :returns: the newly created resource :rtype: dictionary ''' model = context['model'] user = context['user'] package_id = _get_or_bust(data_dict, 'package_id') pkg_dict = _get_action('package_show')(context, {'id': package_id}) _check_access('resource_create', context, data_dict) if not 'resources' in pkg_dict: pkg_dict['resources'] = [] upload = uploader.ResourceUpload(data_dict) pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False _get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except ValidationError, e: errors = e.error_dict['resources'][-1] raise ValidationError(errors)
def _update_resource(resource_id, queue, log): """ Link check and archive the given resource. If successful, updates the archival table with the cache_url & hash etc. Finally, a notification of the archival is broadcast. Params: resource - resource dict queue - name of the celery queue Should only raise on a fundamental error: ArchiverError CkanError Returns a JSON dict, ready to be returned from the celery task giving a success status: { 'resource': the updated resource dict, 'file_path': path to archived file (if archive successful), or None } If not successful, returns None. """ from ckan import model from pylons import config from ckan.plugins import toolkit from ckanext.archiver import default_settings as settings from ckanext.archiver.model import Status, Archival get_action = toolkit.get_action assert is_id(resource_id), resource_id context_ = {'model': model, 'ignore_auth': True, 'session': model.Session} resource = get_action('resource_show')(context_, {'id': resource_id}) if not os.path.exists(settings.ARCHIVE_DIR): log.info("Creating archive directory: %s" % settings.ARCHIVE_DIR) os.mkdir(settings.ARCHIVE_DIR) def _save(status_id, exception, resource, url_redirected_to=None, download_result=None, archive_result=None): reason = u'%s' % exception save_archival(resource, status_id, reason, url_redirected_to, download_result, archive_result, log) notify_resource( resource, queue, archive_result.get('cache_filename') if archive_result else None) # Download try_as_api = False requires_archive = True url = resource['url'] if not url.startswith('http'): url = config['ckan.site_url'].rstrip('/') + url hosted_externally = not url.startswith(config['ckan.site_url']) # if resource.get('resource_type') == 'file.upload' and not hosted_externally: if resource.get('url_type') == 'upload' and not hosted_externally: log.info("Won't attemp to archive resource uploaded locally: %s" % resource['url']) upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) try: hash, length = _file_hashnlength(filepath) except IOError, e: log.error('Error while accessing local resource %s: %s', filepath, e) download_status_id = Status.by_text('URL request failed') _save(download_status_id, e, resource) return mimetype = None headers = None content_type, content_encoding = mimetypes.guess_type(url) if content_type: mimetype = _clean_content_type(content_type) headers = {'Content-Type': content_type} download_result_mock = { 'mimetype': mimetype, 'size': length, 'hash': hash, 'headers': headers, 'saved_file': filepath, 'url_redirected_to': url, 'request_type': 'GET' } archive_result_mock = {'cache_filepath': filepath, 'cache_url': url} # Success _save(Status.by_text('Archived successfully'), '', resource, download_result_mock['url_redirected_to'], download_result_mock, archive_result_mock) # The return value is only used by tests. Serialized for Celery. return json.dumps(dict(download_result_mock, **archive_result_mock))
def upload_package_zipfile_to_s3(context, pkg_dict): ''' upload_zipfiles_to_s3 Uploads package zipfile to S3 ''' # Obtain package pkg = toolkit.get_action('package_show')(data_dict={'id': pkg_dict['id']}) # Init logger logger = logging.getLogger(__name__) logger.info("Starting upload_package_zipfile_to_S3 for package %s" % pkg.get('name', '')) # If all resources are APIs, don't upload the zipfile if resources_all_api(pkg.get('resources')): logger.info("All resources are APIs, skipping package zipfile upload") return # Obtain package and package metadata metadata = toolkit.get_action('package_metadata_show')(data_dict={ 'id': pkg['id'] }) # Initialize package zip file package_buff = StringIO.StringIO() package_zip_archive = zipfile.ZipFile(package_buff, mode='w') # Initialize metadata metadata_yaml_buff = StringIO.StringIO() metadata_yaml_buff.write( unicode("# Metadata for %s\r\n" % pkg["title"]).encode( 'ascii', 'ignore')) yaml.dump(prettify_json(metadata), metadata_yaml_buff, Dumper=MetadataYAMLDumper) # Write metadata to package and updated resource zip package_zip_archive.writestr('metadata-' + pkg.get('name') + '.txt', metadata_yaml_buff.getvalue()) # Start session to make requests: for downloading files from S3 session = requests.Session() # Iterate over resources, downloading and storing them in the package zip file for resource in pkg.get('resources'): resource_extension = os.path.splitext(resource['url'])[1] filename = (slugify(resource['name'], to_lower=True) + resource_extension) # Case 1: Resource is API, skip it if resource.get('format') == 'API': continue # Case 2: Resource is uploaded to CKAN server elif resource.get('url_type') == 'upload': logger.info("Obtaining resource file from CKAN for resource %s" % resource.get('name', '')) upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) package_zip_archive.write(filepath, filename) # Case 3: Resource is not on CKAN, should have a URL to download it from else: # Try to download the resource from the resource URL try: logger.info("Obtaining file from URL %s" % resource.get('url', '')) response = session.get(resource.get('url', ''), timeout=30) # If the response status code is not 200 (i.e. success), raise Exception if response.status_code != 200: logger.error( "Error obtaining resource from the given URL. Response status code is %d" % response.status_code) raise Exception( "Error obtaining resource from the given URL. Response status code is %d" % response.status_code) logger.info("Successfully obtained file from URL %s" % resource.get('url', '')) except requests.exceptions.RequestException: toolkit.abort(404, toolkit._('Resource data not found')) package_zip_archive.writestr(filename, response.content) # Initialize connection to S3 bucket = setup_s3_bucket() # Upload package zip to S3 package_zip_archive.close() package_file_name = (pkg.get('name') + '/' + pkg.get('name') + '.zip') try: logger.info("Uploading package zipfile to S3 for package %s" % pkg.get('name', '')) obj = bucket.put_object(Key=package_file_name, Body=package_buff.getvalue(), ContentType='application/zip') # Set object permissions to public readable obj.Acl().put(ACL='public-read') logger.info( "Successfully uploaded package zipfile to S3 for package %s" % pkg.get('name', '')) except Exception as exception: # Log the error and reraise the exception logger.error("Error uploading package %s zip to S3" % (pkg['id'])) logger.error(exception) raise exception
def upload_resource_to_s3(context, resource): ''' upload_resource_to_s3 Uploads resource to S3 and modifies the following resource fields: - 'upload' - 'url_type' - 'url' ''' # Init logger logger = logging.getLogger(__name__) logger.info("Starting upload_resource_to_s3 for resource %s" % resource.get('name', '')) # Init connection to S3 bucket = setup_s3_bucket() # Get content type and extension content_type, _ = mimetypes.guess_type(resource.get('url', '')) extension = mimetypes.guess_extension(content_type) # Upload to S3 pkg = toolkit.get_action('package_show')(context, { 'id': resource['package_id'] }) timestamp = datetime.datetime.utcnow( ) # should match the assignment in the ResourceUpload class s3_filepath = (pkg.get('name') + '/' + 'resources' + '/' + slugify(resource.get('name'), to_lower=True) + '-' + timestamp.strftime("%Y-%m-%dT%H-%M-%SZ") + extension) # If file is currently being uploaded, the file is in resource['upload'] if isinstance(resource.get('upload', None), cgi.FieldStorage): logger.info("File is being uploaded") resource['upload'].file.seek(0) body = resource['upload'].file # If resource.get('url_type') == 'upload' then the resource is in CKAN file system elif resource.get('url_type') == 'upload': logger.info("File is on CKAN file store") upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) try: body = open(filepath, 'r') except OSError: abort(404, _('Resource data not found')) else: logger.info("File is downloadable from URL") try: # Start session to download files session = requests.Session() logger.info("Attempting to obtain resource %s from url %s" % (resource.get('name', ''), resource.get('url', ''))) response = session.get(resource.get('url', ''), timeout=30) # If the response status code is not 200 (i.e. success), raise Exception if response.status_code != 200: logger.error( "Error obtaining resource from the given URL. Response status code is %d" % response.status_code) raise Exception( "Error obtaining resource from the given URL. Response status code is %d" % response.status_code) body = response.content logger.info("Successfully obtained resource %s from url %s" % (resource.get('name', ''), resource.get('url', ''))) except requests.exceptions.RequestException: toolkit.abort(404, toolkit._('Resource data not found')) try: logger.info("Uploading resource %s to S3" % resource.get('name', '')) bucket.Object(s3_filepath).delete() obj = bucket.put_object(Key=s3_filepath, Body=body, ContentType=content_type) obj.Acl().put(ACL='public-read') logger.info("Successfully uploaded resource %s to S3" % resource.get('name', '')) except Exception as exception: # Log the error and reraise the exception logger.error("Error uploading resource %s from package %s to S3" % (resource['name'], resource['package_id'])) logger.error(exception) if resource.get('url_type') == 'upload': body.close() raise exception if resource.get('url_type') == 'upload': body.close() # Modify fields in resource resource['upload'] = '' resource['url_type'] = 's3' resource['url'] = config.get( 'ckan.datagovsg_s3_resources.s3_url_prefix') + s3_filepath update_timestamp(resource, timestamp)
def resource_update(context, data_dict): '''Update a resource. To update a resource you must be authorized to update the dataset that the resource belongs to. For further parameters see ``resource_create()``. :param id: the id of the resource to update :type id: string :returns: the updated resource :rtype: string ''' model = context['model'] user = context['user'] id = _get_or_bust(data_dict, "id") resource = model.Resource.get(id) context["resource"] = resource if not resource: logging.error('Could not find resource ' + id) raise NotFound(_('Resource was not found.')) _check_access('resource_update', context, data_dict) del context["resource"] package_id = resource.resource_group.package.id # Provide a local context, because package_show will set a schema in it # that is only appropriate for package_show. package_show_context = {'model': model, 'user': user} pkg_dict = _get_action('package_show')(package_show_context, { 'id': package_id, 'use_default_schema': True }) for n, p in enumerate(pkg_dict['resources']): if p['id'] == id: break else: logging.error('Could not find resource ' + id) raise NotFound(_('Resource was not found.')) upload = uploader.ResourceUpload(data_dict) pkg_dict['resources'][n] = data_dict try: context['defer_commit'] = True context['use_cache'] = False pkg_dict = _get_action('package_update')(context, pkg_dict) context.pop('defer_commit') except ValidationError, e: # For debugging #1281 if 'resources' not in e.error_dict: raise Exception('resource_update error: %r' % e.error_dict) errors = e.error_dict['resources'][n] raise ValidationError(errors)
def command(self): if self.args and self.args[0] in ['--help', '-h', 'help']: print self.__doc__ return self._load_config() user = toolkit.get_action('get_site_user')({ 'model': model, 'ignore_auth': True }, {}) context = { 'username': user.get('name'), 'user': user.get('name'), 'model': model } # Add new datasets to keep here keep_datasets = [ u'newcastle-city-council-payments-over-500', u'food-hygiene-information-scheme-rating-glasgow', u'up-library-catalogue', u'geo-examples', u'afghanistan-election-data', u'afterfibre', u'us-national-foreclosure-statistics-january-2012', u'malawi-aid-projects', u'gold-prices', u'adur_district_spending', u'ngds-earthquakes-data', ] # Add new organizations to keep here keep_orgs = [ u'national-statistics-office', u'pagwell-borough-council', u'test-org-2', ] # Add new groups to keep here keep_groups = [ u'geo-examples', u'data-explorer', ] # Get list of resources to delete and delete datasets datasets = self._get_all_packages(context) for dataset in datasets: if (dataset['name'] not in keep_datasets and dataset['id'] not in keep_datasets and dataset['owner_org'] not in keep_orgs): # save data of resources in filestore for later for res in dataset['resources']: if res['url_type'] == 'upload': # Delete the resource files from the filesystem upload = uploader.ResourceUpload(res) filepath = upload.get_path(res['id']) try: os.remove(filepath) except: pass # delete dataset print "Deleting dataset: {0}".format(dataset['name']) toolkit.get_action('dataset_delete')(context, { 'id': dataset['id'] }) # Delete all organizations except specified ones orgs = toolkit.get_action('organization_list')(context, {}) for org in orgs: if org not in keep_orgs: print "Deleting organization: {0}".format(org) toolkit.get_action('organization_delete')(context, {'id': org}) toolkit.get_action('organization_purge')(context, {'id': org}) # Delete all groups except expecified ones groups = toolkit.get_action('group_list')(context, {}) for group in groups: if group not in keep_groups: print "Deleting group: {0}".format(group) toolkit.get_action('group_delete')(context, {'id': group}) toolkit.get_action('group_purge')(context, {'id': group}) # Purge datasets self.clean_deleted()
def resource_download(self, id, resource_id, filename=None): """ Provides a direct download by either redirecting the user to the url stored or downloading an uploaded file directly. """ context = { 'model': model, 'session': model.Session, 'user': c.user or c.author, 'auth_user_obj': c.userobj } try: rsc = get_action('resource_show')(context, {'id': resource_id}) pkg = get_action('package_show')(context, {'id': id}) except NotFound: abort(404, _('Resource not found')) except NotAuthorized: abort(401, _('Unauthorized to read resource %s') % id) if rsc.get('url_type') == 'upload': upload = uploader.ResourceUpload(rsc) filepath = upload.get_path(rsc['id']) #### s3archive new code access_key = config.get('ckanext.s3archive.access_key') secret_key = config.get('ckanext.s3archive.secret_key') bucket_name = config.get('ckanext.s3archive.bucket') if not os.path.exists(filepath): content_type, content_enc = mimetypes.guess_type( rsc.get('url', '')) key_name = filepath[len(filepath) - 39:] conn = s3connection.S3Connection( access_key, secret_key, calling_format=OrdinaryCallingFormat(), host='object.auckland.ac.nz', port=443) bucket = conn.get_bucket(bucket_name) key = None for key in bucket.list(prefix=key_name.lstrip('/')): pass if not key: abort(404, _('Resource data not found')) headers = {} if content_type: headers['response-content-type'] = content_type url = key.generate_url(300, method='GET', response_headers=headers) redirect(url) #### code finish fileapp = paste.fileapp.FileApp(filepath) try: status, headers, app_iter = request.call_application(fileapp) except OSError: abort(404, _('Resource data not found')) response.headers.update(dict(headers)) content_type, content_enc = mimetypes.guess_type(rsc.get( 'url', '')) response.headers['Content-Type'] = content_type response.status = status return app_iter elif not 'url' in rsc: abort(404, _('No download is available')) redirect(rsc['url'])
def index(self, id): print 'index' context = { 'model': ckan.model, 'session': ckan.model.Session, 'user': pylons.c.user or pylons.c.author } try: plugins.toolkit.c.pkg_dict = plugins.toolkit.get_action( 'package_show')(context, { 'id': id }) plugins.toolkit.c.pkg = context['package'] plugins.toolkit.c.resources_json = h.json.dumps( plugins.toolkit.c.pkg_dict.get('resources', [])) except plugins.toolkit.ObjectNotFound: plugins.toolkit.abort(404, plugins.toolkit._('Dataset not found')) except plugins.toolkit.NotAuthorized: plugins.toolkit.abort( 401, plugins.toolkit._('Unauthorized to read package %s') % id) vars = { 'errors': {}, 'data': { 'title': '', #plugins.toolkit._('Clone of {dataset}').format(dataset=plugins.toolkit.c.pkg_dict['title'])', 'name': '' } } if plugins.toolkit.request.method == 'POST': post_data = plugins.toolkit.request.POST if post_data['action-type'] == 'clone': context = { 'model': ckan.model, 'session': ckan.model.Session, 'user': pylons.c.user or pylons.c.author } try: plugins.toolkit.check_access('package_create', context) plugins.toolkit.check_access('package_update', context, {'id': id}) del context['package'] except plugins.toolkit.NotAuthorized: plugins.toolkit.abort( 401, plugins.toolkit._( 'Unauthorized to clone this package')) #get current package... pkg_dict = plugins.toolkit.get_action('package_show')(None, { 'id': id }) #update necessary fields title = ckan.plugins.toolkit.request.params.getone('title') name = ckan.plugins.toolkit.request.params.getone('name') dt = datetime.datetime.now() pkg_dict['title'] = title pkg_dict['name'] = name pkg_dict['metadata_created'] = dt pkg_dict['metadata_modified'] = dt del pkg_dict['id'] del pkg_dict['revision_id'] del pkg_dict['revision_timestamp'] resources = pkg_dict['resources'] for resource in resources: if resource['url_type'] == 'upload': #copy file upload = uploader.ResourceUpload(resource) filepath = upload.get_path(resource['id']) cfs = FieldStorage() cfs.file = open(filepath) cfs.filename = resource['url'].split('/')[-1] resource['upload'] = cfs resource['created'] = dt del resource['id'] del resource['revision_id'] del resource['revision_timestamp'] del pkg_dict['resources'] #create a new one based on existing one... try: #for some reason, the pkg_dict given to 'package_create' still has the old id pkg_dict_new = plugins.toolkit.get_action( 'package_create')(context, pkg_dict) for resource in resources: resource['package_id'] = pkg_dict_new['id'] plugins.toolkit.get_action('resource_create')(context, resource) #if package already has a review date set, return it... if pkg_dict.get('next_review_date'): package_review = get_package_review( ckan.model.Session, pkg_dict_new['id']) if package_review: package_review.next_review_date = pkg_dict.get( 'next_review_date') update_package_review(context['session'], package_review) else: add_package_review( context['session'], pkg_dict_new['id'], pkg_dict.get('next_review_date')) except plugins.toolkit.ValidationError as ve: plugins.toolkit.c.pkg_dict = plugins.toolkit.get_action( 'package_show')(context, { 'id': id }) plugins.toolkit.c.pkg = context['package'] plugins.toolkit.c.resources_json = h.json.dumps( plugins.toolkit.c.pkg_dict.get('resources', [])) errorsOther = dict(ve.error_dict) if 'name' in errorsOther: del errorsOther['name'] vars = { 'errors': ve.error_dict, 'errorsOther': errorsOther, 'data': { 'title': title, 'name': name } } return plugins.toolkit.render("dsaction-index.html", extra_vars=vars) ckan.plugins.toolkit.redirect_to(controller="package", action="edit", id=pkg_dict_new['id']) else: get_data = plugins.toolkit.request.GET if 'action-type' in get_data and get_data[ 'action-type'] == 'export': print 'export' #task 1: work out if the dataset has items in filestore #get package pid = convert_to_id(id, context) query = ckan.model.Session.query( ckan.model.Package).filter(ckan.model.Package.id == pid) file_zip_path = exportPackages(query) #serve zip file fileapp = paste.fileapp.FileApp(file_zip_path) fileapp.content_disposition(filename='%s.zip' % id) status, headers, app_iter = request.call_application(fileapp) response.headers.update(dict(headers)) content_type = 'application/zip' response.headers['Content-Type'] = content_type response.status = status #remove tmp zip file - not sure if this will cause issues deleting the file before it has been fully served? os.remove(file_zip_path) return app_iter return plugins.toolkit.render("dsaction-index.html", extra_vars=vars)
def _save_shape_file_as_resource(self, lat, lon, shape_file_name, watershed_des, organization): source = 'delineate.delineatewatershed._save_shape_file_as_resource():' ajax_response = d_helper.AJAXResponse() if not self._validate_file_name(shape_file_name): ajax_response.success = False ajax_response.message = 'Invalid shape file name:%s.' % shape_file_name + '\nFile name needs to have only ' \ 'alphanumeric characters and ' \ 'dash, hyphen or space characters.' return ajax_response.to_json() # TODO: make the saving of the file to temp directory a separate function ckan_default_dir = d_helper.StringSettings.ckan_user_session_temp_dir session_id = base.session['id'] shape_files_source_dir = os.path.join(ckan_default_dir, session_id, 'ShapeFiles') target_zip_dir = os.path.join(ckan_default_dir, session_id, 'ShapeZippedFile') shape_zip_file = os.path.join(target_zip_dir, shape_file_name + '.zip') if not os.path.isdir(shape_files_source_dir): log.error( source + 'CKAN error: Expected shape file source dir path (%s) is missing.' % shape_files_source_dir) ajax_response.success = False ajax_response.message = _( 'Failed to save the watershed shape file.') return ajax_response.to_json() if not os.path.exists(shape_zip_file): #create the watershed zip file first if os.path.isdir(target_zip_dir): shutil.rmtree(target_zip_dir) os.makedirs(target_zip_dir) files_to_archive = shape_files_source_dir + '/' + 'Watershed.*' zipper = zipfile.ZipFile(shape_zip_file, 'w') for file_to_zip in glob.glob(files_to_archive): zipper.write(file_to_zip, os.path.basename(file_to_zip), compress_type=zipfile.ZIP_DEFLATED) zipper.close() # TODO: make the creation of a new package a new function # create a package package_create_action = tk.get_action('package_create') # create unique package name using the current time stamp as a postfix to any package name unique_postfix = datetime.now().isoformat().replace(':', '-').replace( '.', '-').lower() pkg_title = shape_file_name # + '_' pkg_name = shape_file_name.replace(' ', '-').lower() data_dict = { 'name': pkg_name + '_' + unique_postfix, 'type': 'geographic-feature-set', 'title': pkg_title, 'author': tk.c.userObj.name if tk.c.userObj else tk.c. author, # TODO: userObj is None always. Need to retrieve user full name 'notes': 'This is a dataset that contains a watershed shape zip file for an outlet' ' location at latitude:%s and longitude:%s. ' % (lat, lon) + watershed_des, 'owner_org': organization, 'variable_name': '', # extra metadata field begins from here 'variable_unit': '', 'north_extent': '', 'south_extent': '', 'east_extent': '', 'west_extent': '', 'projection': 'WGS_1984', # this what our delineation service sets for the watershed 'dataset_type': 'geographic-feature-set' } context = { 'model': base.model, 'session': base.model.Session, 'user': tk.c.user or tk.c.author, 'save': 'save' } try: pkg_dict = package_create_action(context, data_dict) log.info(source + 'A new dataset was created with name: %s' % data_dict['title']) except Exception as e: log.error( source + 'Failed to create a new dataset for saving watershed shape file as' ' a resource.\n Exception: %s' % e) ajax_response.success = False ajax_response.message = _( 'Failed to create a new dataset for' ' saving watershed shape file as a resource.') return ajax_response.to_json() # TODO: make the add resource to a package a new function if not 'resources' in pkg_dict: pkg_dict['resources'] = [] file_name = munge.munge_filename(shape_file_name + '.zip') resource = {'url': file_name, 'url_type': 'upload'} upload = uploader.ResourceUpload(resource) upload.filename = file_name upload.upload_file = open(shape_zip_file, 'r') data_dict = { 'format': 'zip', 'name': file_name, 'url': file_name, 'url_type': 'upload' } pkg_dict['resources'].append(data_dict) try: context['defer_commit'] = True context['use_cache'] = False # update the package package_update_action = tk.get_action('package_update') package_update_action(context, pkg_dict) context.pop('defer_commit') except Exception as e: log.error( source + 'Failed to update the new dataset for adding watershed shape file as' ' a resource.\n Exception: %s' % e) ajax_response.success = False ajax_response.message = _( 'Failed to save watershed shape file as a resource.') return ajax_response.to_json() # Get out resource_id resource from model as it will not appear in # package_show until after commit upload.upload(context['package'].resources[-1].id, uploader.get_max_resource_size()) base.model.repo.commit() ajax_response.success = True ajax_response.message = _( 'Watershed shape file was saved as a resource.') return ajax_response.to_json()