Exemple #1
0
def thumb_size(filepath):
    """Determine if a thumbnail file size in storage"""
    if storage_manager.exists(filepath):
        return storage_manager.size(filepath)
    elif os.path.exists(filepath):
        return os.path.getsize(filepath)
    return 0
Exemple #2
0
def get_download_response(request, docid, attachment=False):
    """
    Returns a download response if user has access to download the document of a given id,
    and an http response if they have no permissions to download it.
    """
    document = get_object_or_404(Document, pk=docid)

    if not request.user.has_perm('base.download_resourcebase',
                                 obj=document.get_self_resource()):
        return HttpResponse(loader.render_to_string(
            '401.html',
            context={
                'error_message':
                _("You are not allowed to view this document.")
            },
            request=request),
                            status=401)
    if attachment:
        register_event(request, EventType.EVENT_DOWNLOAD, document)
    filename = slugify(os.path.splitext(os.path.basename(document.title))[0])

    if document.files and storage_manager.exists(document.files[0]):
        return DownloadResponse(storage_manager.open(document.files[0]).file,
                                basename=f'{filename}.{document.extension}',
                                attachment=attachment)
    return HttpResponse("File is not available", status=404)
Exemple #3
0
def original_link_available(context, resourceid, url):
    _not_permitted = _("You are not permitted to save or edit this resource.")
    request = context['request']
    instance = resolve_object(request,
                              ResourceBase, {'pk': resourceid},
                              permission='base.download_resourcebase',
                              permission_msg=_not_permitted)

    download_url = urljoin(settings.SITEURL,
                           reverse("download", args={resourceid}))
    if urlsplit(url).netloc != urlsplit(download_url).netloc or \
            urlsplit(url).path != urlsplit(download_url).path:
        return True

    dataset_files = []
    if isinstance(instance, ResourceBase):
        try:
            for file in instance.files:
                dataset_files.append(file)
                if not storage_manager.exists(file):
                    return False
        except Exception:
            traceback.print_exc()
            return False
    if dataset_files:
        return True
    else:
        return False
Exemple #4
0
    def delete(self, *args, **kwargs):
        importer_locations = []
        super().delete(*args, **kwargs)
        try:
            session = gs_uploader.get_session(self.import_id)
        except (NotFound, Exception):
            session = None
        if session:
            for task in session.tasks:
                if getattr(task, 'data'):
                    importer_locations.append(
                        getattr(task.data, 'location'))
            try:
                session.delete()
            except Exception:
                logging.warning('error deleting upload session')

        # we delete directly the folder with the files of the resource
        if self.resource:
            for _file in self.resource.files:
                try:
                    if storage_manager.exists(_file):
                        storage_manager.delete(_file)
                except Exception as e:
                    logger.warning(e)

            # Do we want to delete the files also from the resource?
            ResourceBase.objects.filter(id=self.resource.id).update(files={})

        for _location in importer_locations:
            try:
                shutil.rmtree(_location)
            except Exception as e:
                logger.warning(e)

        # here we are deleting the local that soon will be removed
        if self.upload_dir and os.path.exists(self.upload_dir):
            try:
                shutil.rmtree(self.upload_dir)
            except Exception as e:
                logger.warning(e)
Exemple #5
0
def download_resource_file(url: str, target_name: str) -> Path:
    """Download a resource file and store it using GeoNode's `storage_manager`.

    Downloads use the django `UploadedFile` helper classes. Depending on the size of the
    remote resource, we may download it into an in-memory buffer or store it on a
    temporary location on disk. After having downloaded the file, we use `storage_manager`
    to save it in the appropriate location.

    """

    response = requests.get(url, stream=True)
    response.raise_for_status()
    file_size = response.headers.get("Content-Length")
    content_type = response.headers.get("Content-Type")
    charset = response.apparent_encoding
    size_threshold = config.get_setting(
        "HARVESTED_RESOURCE_FILE_MAX_MEMORY_SIZE")
    if file_size is not None and int(file_size) < size_threshold:
        logger.debug("Downloading to an in-memory buffer...")
        buf = io.BytesIO()
        file_ = uploadedfile.InMemoryUploadedFile(buf, None, target_name,
                                                  content_type, file_size,
                                                  charset)
    else:
        logger.debug("Downloading to a temporary file...")
        file_ = uploadedfile.TemporaryUploadedFile(target_name, content_type,
                                                   file_size, charset)
        # NOTE: there is no need to explicitly delete the file represented by
        # `file_`, it is being deleted implicitly
    with file_.open("wb+") as fd:
        for chunk in response.iter_content(chunk_size=None,
                                           decode_unicode=False):
            fd.write(chunk)
        fd.seek(0)
        if storage_manager.exists(target_name):
            logger.debug(f"file {target_name!r} already exists, replacing...")
            storage_manager.delete(target_name)
        file_name = storage_manager.save(target_name, fd)
        result = Path(storage_manager.path(file_name))
    return result
Exemple #6
0
def create_document_thumbnail(self, object_id):
    """
    Create thumbnail for a document.
    """
    logger.debug(f"Generating thumbnail for document #{object_id}.")

    try:
        document = Document.objects.get(id=object_id)
    except Document.DoesNotExist:
        logger.error(f"Document #{object_id} does not exist.")
        raise

    image_path = None
    image_file = None

    if document.is_image:
        dname = storage_manager.path(document.files[0])
        if storage_manager.exists(dname):
            image_file = storage_manager.open(dname, 'rb')
    elif document.is_video or document.is_audio:
        image_file = open(document.find_placeholder(), 'rb')
    elif document.is_file:
        dname = storage_manager.path(document.files[0])
        try:
            document_location = storage_manager.path(dname)
        except NotImplementedError as e:
            logger.debug(e)

            document_location = storage_manager.url(dname)

        try:
            image_path = render_document(document_location)
            if image_path is not None:
                try:
                    image_file = open(image_path, 'rb')
                except Exception as e:
                    logger.debug(
                        f"Failed to render document #{object_id}: {e}")
            else:
                logger.debug(f"Failed to render document #{object_id}")
        except ConversionError as e:
            logger.debug(f"Could not convert document #{object_id}: {e}.")
        except NotImplementedError as e:
            logger.debug(f"Failed to render document #{object_id}: {e}")

    thumbnail_content = None
    try:
        try:
            thumbnail_content = generate_thumbnail_content(image_file)
        except Exception as e:
            logger.debug(
                f"Could not generate thumbnail, falling back to 'placeholder': {e}"
            )
            thumbnail_content = generate_thumbnail_content(
                document.find_placeholder())
    except Exception as e:
        logger.error(f"Could not generate thumbnail: {e}")
        return
    finally:
        if image_file is not None:
            image_file.close()

        if image_path is not None:
            os.remove(image_path)

    if not thumbnail_content:
        logger.warning(f"Thumbnail for document #{object_id} empty.")
    filename = f'document-{document.uuid}-thumb.png'
    document.save_thumbnail(filename, thumbnail_content)
    logger.debug(f"Thumbnail for document #{object_id} created.")
Exemple #7
0
def remove_thumb(filename):
    """Delete a thumbnail from storage"""
    path = thumb_path(filename)
    if storage_manager.exists(path):
        storage_manager.delete(path)
Exemple #8
0
def get_thumbs():
    """Fetches a list of all stored thumbnails"""
    if not storage_manager.exists(settings.THUMBNAIL_LOCATION):
        return []
    subdirs, thumbs = storage_manager.listdir(settings.THUMBNAIL_LOCATION)
    return thumbs
Exemple #9
0
def thumb_exists(filename):
    """Determine if a thumbnail file exists in storage"""
    return storage_manager.exists(thumb_path(filename))
Exemple #10
0
def download(request, resourceid, sender=Dataset):

    _not_authorized = _("You are not authorized to download this resource.")
    _not_permitted = _("You are not permitted to save or edit this resource.")
    _no_files_found = _(
        "No files have been found for this resource. Please, contact a system administrator."
    )

    instance = resolve_object(request,
                              sender, {'pk': resourceid},
                              permission='base.download_resourcebase',
                              permission_msg=_not_permitted)

    if isinstance(instance, ResourceBase):
        dataset_files = []
        file_list = []  # Store file info to be returned
        try:
            files = instance.resourcebase_ptr.files
            # Copy all Dataset related files into a temporary folder
            for file_path in files:
                if storage_manager.exists(file_path):
                    dataset_files.append(file_path)
                    filename = os.path.basename(file_path)
                    file_list.append({
                        "name":
                        filename,
                        "data_iter":
                        storage_manager.open(file_path),
                    })
                else:
                    return HttpResponse(loader.render_to_string(
                        '401.html',
                        context={
                            'error_title': _("No files found."),
                            'error_message': _no_files_found
                        },
                        request=request),
                                        status=404)

            # Check we can access the original files
            if not dataset_files:
                return HttpResponse(loader.render_to_string(
                    '401.html',
                    context={
                        'error_title': _("No files found."),
                        'error_message': _no_files_found
                    },
                    request=request),
                                    status=404)

            # ZIP everything and return
            target_file_name = "".join([instance.name, ".zip"])

            target_zip = zipstream.ZipFile(mode='w',
                                           compression=zipstream.ZIP_DEFLATED,
                                           allowZip64=True)

            # Iterable: Needed when the file_info has it's data as a stream
            def _iterable(source_iter):
                while True:
                    buf = source_iter.read(BUFFER_CHUNK_SIZE)
                    if not buf:
                        break
                    yield buf

            # Add files to zip
            for file_info in file_list:
                target_zip.write_iter(arcname=file_info['name'],
                                      iterable=_iterable(
                                          file_info['data_iter']))

            register_event(request, 'download', instance)

            # Streaming content response
            response = StreamingHttpResponse(target_zip,
                                             content_type='application/zip')
            response[
                'Content-Disposition'] = f'attachment; filename="{target_file_name}"'
            return response
        except (NotImplementedError, Upload.DoesNotExist):
            traceback.print_exc()
            tb = traceback.format_exc()
            logger.debug(tb)
            return HttpResponse(loader.render_to_string(
                '401.html',
                context={
                    'error_title': _("No files found."),
                    'error_message': _no_files_found
                },
                request=request),
                                status=404)
    return HttpResponse(loader.render_to_string('401.html',
                                                context={
                                                    'error_title':
                                                    _("Not Authorized"),
                                                    'error_message':
                                                    _not_authorized
                                                },
                                                request=request),
                        status=403)
Exemple #11
0
    def test_moderated_upload(self):
        """
        Test if moderation flag works
        """
        with self.settings(ADMIN_MODERATE_UPLOADS=False):
            self.client.login(username=self.user, password=self.passwd)
            input_path = self._get_input_path()
            dname = 'document title'
            with open(
                    os.path.join(f"{self.project_root}", "tests/data/img.gif"),
                    "rb") as f:
                data = {
                    'title': dname,
                    'doc_file': f,
                    'resource': '',
                    'extension': 'txt',
                    'permissions': '{}',
                }
                resp = self.client.post(self.document_upload_url, data=data)
                self.assertEqual(resp.status_code, 200, resp.content)
            _d = Document.objects.get(title=dname)

            self.assertTrue(_d.is_published)
            uuid = _d.uuid
            _d.delete()

            from geonode.documents.utils import delete_orphaned_document_files
            if storage_manager.exists(os.path.join("documents", "document")):
                _, document_files_before = storage_manager.listdir(
                    os.path.join("documents", "document"))
                deleted = delete_orphaned_document_files()
                _, document_files_after = storage_manager.listdir(
                    os.path.join("documents", "document"))
                self.assertTrue(len(deleted) > 0)
                self.assertEqual(
                    set(deleted),
                    set(document_files_before) - set(document_files_after))

            from geonode.base.utils import delete_orphaned_thumbs
            thumb_files_before = get_thumbs()
            deleted = delete_orphaned_thumbs()
            thumb_files_after = get_thumbs()
            if deleted:
                self.assertTrue(
                    len(deleted) > 0,
                    f"before: {thumb_files_before} - deleted: {deleted} - after: {thumb_files_after}"
                )
                self.assertEqual(
                    set(deleted),
                    set(thumb_files_before) - set(thumb_files_after),
                    f"deleted: {deleted} vs {set(thumb_files_before) - set(thumb_files_after)}"
                )

            fn = os.path.join(os.path.join("documents", "document"),
                              os.path.basename(input_path))
            self.assertFalse(storage_manager.exists(fn))

            files = [thumb for thumb in get_thumbs() if uuid in thumb]
            if files and len(files):
                self.assertEqual(len(files), 1)

        with self.settings(ADMIN_MODERATE_UPLOADS=True):
            self.client.login(username=self.user, password=self.passwd)
            norman = get_user_model().objects.get(username="******")
            group = GroupProfile.objects.get(slug="bar")
            input_path = self._get_input_path()
            dname = 'document title'
            with open(input_path, 'rb') as f:
                data = {
                    'title': dname,
                    'doc_file': f,
                    'resource': '',
                    'extension': 'txt',
                    'permissions': '{}',
                }
                resp = self.client.post(self.document_upload_url, data=data)
                self.assertEqual(resp.status_code, 200)
            _d = Document.objects.get(title=dname)
            self.assertFalse(_d.is_approved)
            self.assertTrue(_d.is_published)

            group.join(norman)
            self.assertFalse(group.user_is_role(norman, "manager"))
            GroupMember.objects.get(group=group, user=norman).promote()
            self.assertTrue(group.user_is_role(norman, "manager"))

            self.client.login(username="******", password="******")
            resp = self.client.get(reverse('document_detail', args=(_d.id, )))
            # Forbidden
            self.assertEqual(resp.status_code, 403)
            _d.group = group.group
            _d.save()
            resp = self.client.get(reverse('document_detail', args=(_d.id, )))
            # Allowed - edit permissions
            self.assertEqual(resp.status_code, 200)
            perms_list = get_perms(norman, _d.get_self_resource()) + get_perms(
                norman, _d)
            self.assertTrue('change_resourcebase_metadata' in perms_list)
            GroupMember.objects.get(group=group, user=norman).demote()
            self.assertFalse(group.user_is_role(norman, "manager"))
            resp = self.client.get(reverse('document_detail', args=(_d.id, )))
            # Allowed - no edit
            self.assertEqual(resp.status_code, 200)
            perms_list = get_perms(norman, _d.get_self_resource()) + get_perms(
                norman, _d)
            self.assertFalse('change_resourcebase_metadata' in perms_list)
            group.leave(norman)
Exemple #12
0
def scan_file(file_name, scan_hint=None, charset=None):
    '''get a list of SpatialFiles for the provided file'''
    if not os.path.exists(file_name):
        try:
            if not storage_manager.exists(file_name):
                raise Exception(_("Could not access to uploaded data."))
        except SuspiciousFileOperation:
            pass

    dirname = os.path.dirname(file_name)
    paths = []
    if zipfile.is_zipfile(file_name) or len(os.path.splitext(file_name)) > 0 and os.path.splitext(file_name)[1].lower() == '.zip':
        try:
            paths, kept_zip = _process_zip(
                file_name,
                dirname,
                scan_hint=scan_hint,
                charset=charset)
            archive = file_name if kept_zip else None
        except Exception as e:
            logger.debug(e)
            archive = file_name
    else:
        for p in os.listdir(dirname):
            _f = os.path.join(dirname, p)
            try:
                fixup_shp_columnnames(_f, charset)
            except Exception as e:
                logger.debug(e)
            paths.append(_f)
        archive = None
    if paths is not None:
        safe_paths = _rename_files(paths)
    else:
        safe_paths = []

    found = []
    for file_type in types:
        for path in safe_paths:
            path_extension = os.path.splitext(path)[-1][1:]
            hint_ok = (scan_hint is None or file_type.code == scan_hint or
                       scan_hint in file_type.aliases)
            if file_type.matches(path_extension) and hint_ok:
                _f = file_type.build_spatial_file(path, safe_paths)
                found_paths = [f.base_file for f in found]
                if path not in found_paths:
                    found.append(_f)

    # detect xmls and assign if a single upload is found
    xml_files = _find_file_type(safe_paths, extension='.xml')
    if xml_files:
        if len(found) == 1:
            found[0].xml_files = xml_files
        else:
            raise Exception(_("One or more XML files was provided, but no matching files were found for them."))

    # detect slds and assign if a single upload is found
    sld_files = _find_file_type(safe_paths, extension='.sld')
    if sld_files:
        if len(found) == 1:
            found[0].sld_files = sld_files
        else:
            raise Exception(_("One or more SLD files was provided, but no matching files were found for them."))
    return SpatialFiles(dirname, found, archive=archive)
Exemple #13
0
def download(request, resourceid, sender=Dataset):

    _not_authorized = _("You are not authorized to download this resource.")
    _not_permitted = _("You are not permitted to save or edit this resource.")
    _no_files_found = _("No files have been found for this resource. Please, contact a system administrator.")

    instance = resolve_object(request,
                              sender,
                              {'pk': resourceid},
                              permission='base.download_resourcebase',
                              permission_msg=_not_permitted)

    if isinstance(instance, ResourceBase):
        # Create Target Folder
        dirpath = tempfile.mkdtemp(dir=settings.STATIC_ROOT)
        dir_time_suffix = get_dir_time_suffix()
        target_folder = os.path.join(dirpath, dir_time_suffix)
        if not os.path.exists(target_folder):
            os.makedirs(target_folder)

        dataset_files = []
        try:
            files = instance.resourcebase_ptr.files
            # Copy all Dataset related files into a temporary folder
            for file_path in files:
                if storage_manager.exists(file_path):
                    dataset_files.append(file_path)
                    filename = os.path.basename(file_path)
                    with open(f"{target_folder}/{filename}", 'wb+') as f:
                        f.write(storage_manager.open(file_path).read())
                else:
                    return HttpResponse(
                        loader.render_to_string(
                            '401.html',
                            context={
                                'error_title': _("No files found."),
                                'error_message': _no_files_found
                            },
                            request=request), status=404)

            # Check we can access the original files
            if not dataset_files:
                return HttpResponse(
                    loader.render_to_string(
                        '401.html',
                        context={
                            'error_title': _("No files found."),
                            'error_message': _no_files_found
                        },
                        request=request), status=404)

            # ZIP everything and return
            target_file_name = "".join([instance.name, ".zip"])
            target_file = os.path.join(dirpath, target_file_name)
            zip_dir(target_folder, target_file)
            register_event(request, 'download', instance)
            response = HttpResponse(
                content=open(target_file, mode='rb'),
                status=200,
                content_type="application/zip")
            response['Content-Disposition'] = f'attachment; filename="{target_file_name}"'
            return response
        except (NotImplementedError, Upload.DoesNotExist):
            traceback.print_exc()
            tb = traceback.format_exc()
            logger.debug(tb)
            return HttpResponse(
                loader.render_to_string(
                    '401.html',
                    context={
                        'error_title': _("No files found."),
                        'error_message': _no_files_found
                    },
                    request=request), status=404)
        finally:
            if target_folder is not None:
                shutil.rmtree(target_folder, ignore_errors=True)
    return HttpResponse(
        loader.render_to_string(
            '401.html',
            context={
                'error_title': _("Not Authorized"),
                'error_message': _not_authorized
            },
            request=request), status=403)