Esempio n. 1
0
    async def test_zip_files(self, temp_files):

        files = []
        for filename in ['file1.ext', 'zip.zip', 'file2.ext']:
            path = temp_files.add_file(filename)
            contents = os.urandom(2**5)
            with open(path, 'wb') as f:
                f.write(contents)
            files.append({
                'filename': filename,
                'path': path,
                'contents': contents,
                'handle': open(path, 'rb')
            })

        stream = streams.ZipStreamReader(
            AsyncIterator(
                (file['filename'], streams.FileStreamReader(file['handle']))
                for file in files))
        data = await stream.read()
        for file in files:
            file['handle'].close()
        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs: `.testzip()` returns `None` if there are no bad files in the zipfile
        assert zip.testzip() is None

        for file in files:
            assert zip.open(file['filename']).read() == file['contents']
            compression_type = zip.open(file['filename'])._compress_type
            if file['filename'].endswith('.zip'):
                assert compression_type == zipfile.ZIP_STORED
            else:
                assert compression_type != zipfile.ZIP_STORED
Esempio n. 2
0
    def zip(self, path, **kwargs):
        """Streams a Zip archive of the given folder

        :param str path: The folder to compress
        """
        if path.is_file:
            base_path = path.parent.path
        else:
            base_path = path.path

        names, coros, remaining = [], [], [path]

        while remaining:
            path = remaining.pop()
            metadata = yield from self.metadata(path)

            for item in metadata:
                current_path = yield from self.revalidate_path(
                    path,
                    item.name,
                    folder=item.is_folder
                )
                if current_path.is_file:
                    names.append(current_path.path.replace(base_path, '', 1))
                    coros.append(self.__zip_defered_download(current_path))
                else:
                    remaining.append(current_path)

        return streams.ZipStreamReader(*zip(names, coros))
Esempio n. 3
0
    async def test_multiple_files(self):

        file1 = ('file1.txt', streams.StringStream('[File One]'))
        file2 = ('file2.txt', streams.StringStream('[File Two]'))
        file3 = ('file3.txt', streams.StringStream('[File Three]'))

        files = AsyncIterator([file1, file2, file3])

        stream = streams.ZipStreamReader(files)

        data = await stream.read()

        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs
        assert zip.testzip() is None

        # Check content of included files

        zipped1 = zip.open('file1.txt')
        assert zipped1.read() == b'[File One]'

        zipped2 = zip.open('file2.txt')
        assert zipped2.read() == b'[File Two]'

        zipped3 = zip.open('file3.txt')
        assert zipped3.read() == b'[File Three]'
Esempio n. 4
0
    def test_multiple_large_files(self, temp_files):
        files = []
        for index in range(5):
            filename = 'file{}.ext'.format(index)
            path = temp_files.add_file(filename)
            contents = os.urandom(2**18)

            with open(path, 'wb') as f:
                f.write(contents)

            files.append({
                'filename': filename,
                'path': path,
                'contents': contents
            })

        for file in files:
            file['handle'] = open(file['path'], 'rb')

        stream = streams.ZipStreamReader(
            *((file['filename'], streams.FileStreamReader(file['handle']))
              for file in files))

        data = yield from stream.read()

        for file in files:
            file['handle'].close()

        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs
        assert zip.testzip() is None

        for file in files:
            assert zip.open(file['filename']).read() == file['contents']
Esempio n. 5
0
    async def upload(self, stream, path, **kwargs):
        """Zips the given stream then uploads to Dataverse.
        This will delete existing draft files with the same name.

        :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse
        :param str path: The filename prepended with '/'

        :rtype: dict, bool
        """

        stream.add_writer('md5', streams.HashStreamWriter(hashlib.md5))

        zip_stream = streams.ZipStreamReader(
            AsyncIterator([(path.name, stream)]))

        # Write stream to disk (Necessary to find zip file size)
        f = tempfile.TemporaryFile()
        chunk = await zip_stream.read()
        while chunk:
            f.write(chunk)
            chunk = await zip_stream.read()
        file_stream = streams.FileStreamReader(f)

        dv_headers = {
            "Content-Disposition": "filename=temp.zip",
            "Content-Type": "application/zip",
            "Packaging": "http://purl.org/net/sword/package/SimpleZip",
            "Content-Length": str(file_stream.size),
        }

        # Delete old file if it exists
        if path.identifier:
            await self.delete(path)

        resp = await self.make_request('POST',
                                       self.build_url(
                                           settings.EDIT_MEDIA_BASE_URL,
                                           'study', self.doi),
                                       headers=dv_headers,
                                       auth=(self.token, ),
                                       data=file_stream,
                                       expects=(201, ),
                                       throws=exceptions.UploadError)
        await resp.release()

        # Find appropriate version of file
        metadata = await self._get_data('latest')
        files = metadata if isinstance(metadata, list) else []
        file_metadata = next(file for file in files if file.name == path.name)

        if stream.writers['md5'].hexdigest != file_metadata.extra['hashes'][
                'md5']:
            raise exceptions.UploadChecksumMismatchError()

        return file_metadata, path.identifier is None
Esempio n. 6
0
    async def zip(self, path, **kwargs):
        """Streams a Zip archive of the given folder

        :param str path: The folder to compress
        """

        metadata = await self.metadata(path)
        if path.is_file:
            metadata = [metadata]
            path = path.parent

        return streams.ZipStreamReader(ZipStreamGenerator(self, path, *metadata))
Esempio n. 7
0
    async def zip(self, path: wb_path.WaterButlerPath, **kwargs) -> asyncio.StreamReader:
        """Streams a Zip archive of the given folder

        :param  path: ( :class:`.WaterButlerPath` ) The folder to compress
        """

        meta_data = await self.metadata(path)  # type: ignore
        if path.is_file:
            meta_data = [meta_data]  # type: ignore
            path = path.parent

        return streams.ZipStreamReader(ZipStreamGenerator(self, path, *meta_data))  # type: ignore
Esempio n. 8
0
    def test_single_file(self):
        file = ('filename.extension', streams.StringStream('[File Content]'))

        stream = streams.ZipStreamReader(file)

        data = yield from stream.read()

        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs
        assert zip.testzip() is None

        result = zip.open('filename.extension')

        # Check content of included file
        assert result.read() == b'[File Content]'
Esempio n. 9
0
    def upload(self, stream, path, **kwargs):
        """Zips the given stream then uploads to Dataverse.
        This will delete existing draft files with the same name.

        :param waterbutler.core.streams.RequestWrapper stream: The stream to put to Dataverse
        :param str path: The filename prepended with '/'

        :rtype: dict, bool
        """

        stream = streams.ZipStreamReader((path.name, stream))

        # Write stream to disk (Necessary to find zip file size)
        f = tempfile.TemporaryFile()
        chunk = yield from stream.read()
        while chunk:
            f.write(chunk)
            chunk = yield from stream.read()
        stream = streams.FileStreamReader(f)

        dv_headers = {
            "Content-Disposition": "filename=temp.zip",
            "Content-Type": "application/zip",
            "Packaging": "http://purl.org/net/sword/package/SimpleZip",
            "Content-Length": str(stream.size),
        }

        # Delete old file if it exists
        if path.identifier:
            yield from self.delete(path)

        yield from self.make_request(
            'POST',
            self.build_url(settings.EDIT_MEDIA_BASE_URL, 'study', self.doi),
            headers=dv_headers,
            auth=(self.token, ),
            data=stream,
            expects=(201, ),
            throws=exceptions.UploadError
        )

        # Find appropriate version of file
        metadata = yield from self._get_data('latest')
        files = metadata if isinstance(metadata, list) else []
        file_metadata = next(file for file in files if file['name'] == path.name)

        return file_metadata, path.identifier is None
Esempio n. 10
0
    def test_download_stream(self):
        data = b'freddie brian john roger'
        stream = streams.StringStream(data)
        stream.content_type = 'application/octet-stream'

        zipstream = streams.ZipStreamReader(('file.txt', stream))

        self.mock_provider.zip = utils.MockCoroutine(return_value=zipstream)

        resp = yield self.http_client.fetch(
            self.get_url('/zip?provider=queenhub&path=/freddie.png'), )

        zip = zipfile.ZipFile(io.BytesIO(resp.body))

        assert zip.testzip() is None

        assert zip.open('file.txt').read() == data
Esempio n. 11
0
    async def test_single_large_file(self, temp_files):
        filename = 'foo.txt'
        path = temp_files.add_file(filename)
        random_data = os.urandom(2**18)
        with open(path, 'wb') as f:
            f.write(random_data)

        with open(path, 'rb') as f:

            stream = streams.ZipStreamReader(
                AsyncIterator([(filename, streams.FileStreamReader(f))]))

            data = await stream.read()

        zip = zipfile.ZipFile(io.BytesIO(data))

        # Verify CRCs
        assert zip.testzip() is None

        result = zip.open('foo.txt')

        # Check content of included file
        assert result.read() == random_data