def thumb_size(filepath): """Determine if a thumbnail file size in storage""" if storage_manager.exists(filepath): return storage_manager.size(filepath) elif os.path.exists(filepath): return os.path.getsize(filepath) return 0
def get_download_response(request, docid, attachment=False): """ Returns a download response if user has access to download the document of a given id, and an http response if they have no permissions to download it. """ document = get_object_or_404(Document, pk=docid) if not request.user.has_perm('base.download_resourcebase', obj=document.get_self_resource()): return HttpResponse(loader.render_to_string( '401.html', context={ 'error_message': _("You are not allowed to view this document.") }, request=request), status=401) if attachment: register_event(request, EventType.EVENT_DOWNLOAD, document) filename = slugify(os.path.splitext(os.path.basename(document.title))[0]) if document.files and storage_manager.exists(document.files[0]): return DownloadResponse(storage_manager.open(document.files[0]).file, basename=f'{filename}.{document.extension}', attachment=attachment) return HttpResponse("File is not available", status=404)
def original_link_available(context, resourceid, url): _not_permitted = _("You are not permitted to save or edit this resource.") request = context['request'] instance = resolve_object(request, ResourceBase, {'pk': resourceid}, permission='base.download_resourcebase', permission_msg=_not_permitted) download_url = urljoin(settings.SITEURL, reverse("download", args={resourceid})) if urlsplit(url).netloc != urlsplit(download_url).netloc or \ urlsplit(url).path != urlsplit(download_url).path: return True dataset_files = [] if isinstance(instance, ResourceBase): try: for file in instance.files: dataset_files.append(file) if not storage_manager.exists(file): return False except Exception: traceback.print_exc() return False if dataset_files: return True else: return False
def delete(self, *args, **kwargs): importer_locations = [] super().delete(*args, **kwargs) try: session = gs_uploader.get_session(self.import_id) except (NotFound, Exception): session = None if session: for task in session.tasks: if getattr(task, 'data'): importer_locations.append( getattr(task.data, 'location')) try: session.delete() except Exception: logging.warning('error deleting upload session') # we delete directly the folder with the files of the resource if self.resource: for _file in self.resource.files: try: if storage_manager.exists(_file): storage_manager.delete(_file) except Exception as e: logger.warning(e) # Do we want to delete the files also from the resource? ResourceBase.objects.filter(id=self.resource.id).update(files={}) for _location in importer_locations: try: shutil.rmtree(_location) except Exception as e: logger.warning(e) # here we are deleting the local that soon will be removed if self.upload_dir and os.path.exists(self.upload_dir): try: shutil.rmtree(self.upload_dir) except Exception as e: logger.warning(e)
def download_resource_file(url: str, target_name: str) -> Path: """Download a resource file and store it using GeoNode's `storage_manager`. Downloads use the django `UploadedFile` helper classes. Depending on the size of the remote resource, we may download it into an in-memory buffer or store it on a temporary location on disk. After having downloaded the file, we use `storage_manager` to save it in the appropriate location. """ response = requests.get(url, stream=True) response.raise_for_status() file_size = response.headers.get("Content-Length") content_type = response.headers.get("Content-Type") charset = response.apparent_encoding size_threshold = config.get_setting( "HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE") if file_size is not None and int(file_size) < size_threshold: logger.debug("Downloading to an in-memory buffer...") buf = io.BytesIO() file_ = uploadedfile.InMemoryUploadedFile(buf, None, target_name, content_type, file_size, charset) else: logger.debug("Downloading to a temporary file...") file_ = uploadedfile.TemporaryUploadedFile(target_name, content_type, file_size, charset) # NOTE: there is no need to explicitly delete the file represented by # `file_`, it is being deleted implicitly with file_.open("wb+") as fd: for chunk in response.iter_content(chunk_size=None, decode_unicode=False): fd.write(chunk) fd.seek(0) if storage_manager.exists(target_name): logger.debug(f"file {target_name!r} already exists, replacing...") storage_manager.delete(target_name) file_name = storage_manager.save(target_name, fd) result = Path(storage_manager.path(file_name)) return result
def create_document_thumbnail(self, object_id): """ Create thumbnail for a document. """ logger.debug(f"Generating thumbnail for document #{object_id}.") try: document = Document.objects.get(id=object_id) except Document.DoesNotExist: logger.error(f"Document #{object_id} does not exist.") raise image_path = None image_file = None if document.is_image: dname = storage_manager.path(document.files[0]) if storage_manager.exists(dname): image_file = storage_manager.open(dname, 'rb') elif document.is_video or document.is_audio: image_file = open(document.find_placeholder(), 'rb') elif document.is_file: dname = storage_manager.path(document.files[0]) try: document_location = storage_manager.path(dname) except NotImplementedError as e: logger.debug(e) document_location = storage_manager.url(dname) try: image_path = render_document(document_location) if image_path is not None: try: image_file = open(image_path, 'rb') except Exception as e: logger.debug( f"Failed to render document #{object_id}: {e}") else: logger.debug(f"Failed to render document #{object_id}") except ConversionError as e: logger.debug(f"Could not convert document #{object_id}: {e}.") except NotImplementedError as e: logger.debug(f"Failed to render document #{object_id}: {e}") thumbnail_content = None try: try: thumbnail_content = generate_thumbnail_content(image_file) except Exception as e: logger.debug( f"Could not generate thumbnail, falling back to 'placeholder': {e}" ) thumbnail_content = generate_thumbnail_content( document.find_placeholder()) except Exception as e: logger.error(f"Could not generate thumbnail: {e}") return finally: if image_file is not None: image_file.close() if image_path is not None: os.remove(image_path) if not thumbnail_content: logger.warning(f"Thumbnail for document #{object_id} empty.") filename = f'document-{document.uuid}-thumb.png' document.save_thumbnail(filename, thumbnail_content) logger.debug(f"Thumbnail for document #{object_id} created.")
def remove_thumb(filename): """Delete a thumbnail from storage""" path = thumb_path(filename) if storage_manager.exists(path): storage_manager.delete(path)
def get_thumbs(): """Fetches a list of all stored thumbnails""" if not storage_manager.exists(settings.THUMBNAIL_LOCATION): return [] subdirs, thumbs = storage_manager.listdir(settings.THUMBNAIL_LOCATION) return thumbs
def thumb_exists(filename): """Determine if a thumbnail file exists in storage""" return storage_manager.exists(thumb_path(filename))
def download(request, resourceid, sender=Dataset): _not_authorized = _("You are not authorized to download this resource.") _not_permitted = _("You are not permitted to save or edit this resource.") _no_files_found = _( "No files have been found for this resource. Please, contact a system administrator." ) instance = resolve_object(request, sender, {'pk': resourceid}, permission='base.download_resourcebase', permission_msg=_not_permitted) if isinstance(instance, ResourceBase): dataset_files = [] file_list = [] # Store file info to be returned try: files = instance.resourcebase_ptr.files # Copy all Dataset related files into a temporary folder for file_path in files: if storage_manager.exists(file_path): dataset_files.append(file_path) filename = os.path.basename(file_path) file_list.append({ "name": filename, "data_iter": storage_manager.open(file_path), }) else: return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # Check we can access the original files if not dataset_files: return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # ZIP everything and return target_file_name = "".join([instance.name, ".zip"]) target_zip = zipstream.ZipFile(mode='w', compression=zipstream.ZIP_DEFLATED, allowZip64=True) # Iterable: Needed when the file_info has it's data as a stream def _iterable(source_iter): while True: buf = source_iter.read(BUFFER_CHUNK_SIZE) if not buf: break yield buf # Add files to zip for file_info in file_list: target_zip.write_iter(arcname=file_info['name'], iterable=_iterable( file_info['data_iter'])) register_event(request, 'download', instance) # Streaming content response response = StreamingHttpResponse(target_zip, content_type='application/zip') response[ 'Content-Disposition'] = f'attachment; filename="{target_file_name}"' return response except (NotImplementedError, Upload.DoesNotExist): traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) return HttpResponse(loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) return HttpResponse(loader.render_to_string('401.html', context={ 'error_title': _("Not Authorized"), 'error_message': _not_authorized }, request=request), status=403)
def test_moderated_upload(self): """ Test if moderation flag works """ with self.settings(ADMIN_MODERATE_UPLOADS=False): self.client.login(username=self.user, password=self.passwd) input_path = self._get_input_path() dname = 'document title' with open( os.path.join(f"{self.project_root}", "tests/data/img.gif"), "rb") as f: data = { 'title': dname, 'doc_file': f, 'resource': '', 'extension': 'txt', 'permissions': '{}', } resp = self.client.post(self.document_upload_url, data=data) self.assertEqual(resp.status_code, 200, resp.content) _d = Document.objects.get(title=dname) self.assertTrue(_d.is_published) uuid = _d.uuid _d.delete() from geonode.documents.utils import delete_orphaned_document_files if storage_manager.exists(os.path.join("documents", "document")): _, document_files_before = storage_manager.listdir( os.path.join("documents", "document")) deleted = delete_orphaned_document_files() _, document_files_after = storage_manager.listdir( os.path.join("documents", "document")) self.assertTrue(len(deleted) > 0) self.assertEqual( set(deleted), set(document_files_before) - set(document_files_after)) from geonode.base.utils import delete_orphaned_thumbs thumb_files_before = get_thumbs() deleted = delete_orphaned_thumbs() thumb_files_after = get_thumbs() if deleted: self.assertTrue( len(deleted) > 0, f"before: {thumb_files_before} - deleted: {deleted} - after: {thumb_files_after}" ) self.assertEqual( set(deleted), set(thumb_files_before) - set(thumb_files_after), f"deleted: {deleted} vs {set(thumb_files_before) - set(thumb_files_after)}" ) fn = os.path.join(os.path.join("documents", "document"), os.path.basename(input_path)) self.assertFalse(storage_manager.exists(fn)) files = [thumb for thumb in get_thumbs() if uuid in thumb] if files and len(files): self.assertEqual(len(files), 1) with self.settings(ADMIN_MODERATE_UPLOADS=True): self.client.login(username=self.user, password=self.passwd) norman = get_user_model().objects.get(username="******") group = GroupProfile.objects.get(slug="bar") input_path = self._get_input_path() dname = 'document title' with open(input_path, 'rb') as f: data = { 'title': dname, 'doc_file': f, 'resource': '', 'extension': 'txt', 'permissions': '{}', } resp = self.client.post(self.document_upload_url, data=data) self.assertEqual(resp.status_code, 200) _d = Document.objects.get(title=dname) self.assertFalse(_d.is_approved) self.assertTrue(_d.is_published) group.join(norman) self.assertFalse(group.user_is_role(norman, "manager")) GroupMember.objects.get(group=group, user=norman).promote() self.assertTrue(group.user_is_role(norman, "manager")) self.client.login(username="******", password="******") resp = self.client.get(reverse('document_detail', args=(_d.id, ))) # Forbidden self.assertEqual(resp.status_code, 403) _d.group = group.group _d.save() resp = self.client.get(reverse('document_detail', args=(_d.id, ))) # Allowed - edit permissions self.assertEqual(resp.status_code, 200) perms_list = get_perms(norman, _d.get_self_resource()) + get_perms( norman, _d) self.assertTrue('change_resourcebase_metadata' in perms_list) GroupMember.objects.get(group=group, user=norman).demote() self.assertFalse(group.user_is_role(norman, "manager")) resp = self.client.get(reverse('document_detail', args=(_d.id, ))) # Allowed - no edit self.assertEqual(resp.status_code, 200) perms_list = get_perms(norman, _d.get_self_resource()) + get_perms( norman, _d) self.assertFalse('change_resourcebase_metadata' in perms_list) group.leave(norman)
def scan_file(file_name, scan_hint=None, charset=None): '''get a list of SpatialFiles for the provided file''' if not os.path.exists(file_name): try: if not storage_manager.exists(file_name): raise Exception(_("Could not access to uploaded data.")) except SuspiciousFileOperation: pass dirname = os.path.dirname(file_name) paths = [] if zipfile.is_zipfile(file_name) or len(os.path.splitext(file_name)) > 0 and os.path.splitext(file_name)[1].lower() == '.zip': try: paths, kept_zip = _process_zip( file_name, dirname, scan_hint=scan_hint, charset=charset) archive = file_name if kept_zip else None except Exception as e: logger.debug(e) archive = file_name else: for p in os.listdir(dirname): _f = os.path.join(dirname, p) try: fixup_shp_columnnames(_f, charset) except Exception as e: logger.debug(e) paths.append(_f) archive = None if paths is not None: safe_paths = _rename_files(paths) else: safe_paths = [] found = [] for file_type in types: for path in safe_paths: path_extension = os.path.splitext(path)[-1][1:] hint_ok = (scan_hint is None or file_type.code == scan_hint or scan_hint in file_type.aliases) if file_type.matches(path_extension) and hint_ok: _f = file_type.build_spatial_file(path, safe_paths) found_paths = [f.base_file for f in found] if path not in found_paths: found.append(_f) # detect xmls and assign if a single upload is found xml_files = _find_file_type(safe_paths, extension='.xml') if xml_files: if len(found) == 1: found[0].xml_files = xml_files else: raise Exception(_("One or more XML files was provided, but no matching files were found for them.")) # detect slds and assign if a single upload is found sld_files = _find_file_type(safe_paths, extension='.sld') if sld_files: if len(found) == 1: found[0].sld_files = sld_files else: raise Exception(_("One or more SLD files was provided, but no matching files were found for them.")) return SpatialFiles(dirname, found, archive=archive)
def download(request, resourceid, sender=Dataset): _not_authorized = _("You are not authorized to download this resource.") _not_permitted = _("You are not permitted to save or edit this resource.") _no_files_found = _("No files have been found for this resource. Please, contact a system administrator.") instance = resolve_object(request, sender, {'pk': resourceid}, permission='base.download_resourcebase', permission_msg=_not_permitted) if isinstance(instance, ResourceBase): # Create Target Folder dirpath = tempfile.mkdtemp(dir=settings.STATIC_ROOT) dir_time_suffix = get_dir_time_suffix() target_folder = os.path.join(dirpath, dir_time_suffix) if not os.path.exists(target_folder): os.makedirs(target_folder) dataset_files = [] try: files = instance.resourcebase_ptr.files # Copy all Dataset related files into a temporary folder for file_path in files: if storage_manager.exists(file_path): dataset_files.append(file_path) filename = os.path.basename(file_path) with open(f"{target_folder}/{filename}", 'wb+') as f: f.write(storage_manager.open(file_path).read()) else: return HttpResponse( loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # Check we can access the original files if not dataset_files: return HttpResponse( loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) # ZIP everything and return target_file_name = "".join([instance.name, ".zip"]) target_file = os.path.join(dirpath, target_file_name) zip_dir(target_folder, target_file) register_event(request, 'download', instance) response = HttpResponse( content=open(target_file, mode='rb'), status=200, content_type="application/zip") response['Content-Disposition'] = f'attachment; filename="{target_file_name}"' return response except (NotImplementedError, Upload.DoesNotExist): traceback.print_exc() tb = traceback.format_exc() logger.debug(tb) return HttpResponse( loader.render_to_string( '401.html', context={ 'error_title': _("No files found."), 'error_message': _no_files_found }, request=request), status=404) finally: if target_folder is not None: shutil.rmtree(target_folder, ignore_errors=True) return HttpResponse( loader.render_to_string( '401.html', context={ 'error_title': _("Not Authorized"), 'error_message': _not_authorized }, request=request), status=403)