Esempio n. 1
0
 def test_read_sequential(self):
     _put(b"123456")
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=2)
     self.assertEqual(file.read(2), b"12")
     self.assertEqual(file.read(2), b"34")
     self.assertEqual(file.read(2), b"56")
     self.assertEqual(file.read(2), b"")
Esempio n. 2
0
 def test_read_sequential(self):
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=2)
     self.assertEqual(file.read(2), b'12')
     self.assertEqual(file.read(2), b'34')
     self.assertEqual(file.read(2), b'56')
     self.assertEqual(file.read(2), b'')
Esempio n. 3
0
 def test_raise_file_not_found_between_blocks(self):
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=3)
     _clear()
     file.read(3)  # first block is already loaded
     with self.assertRaises(FileNotFoundError):
         file.read(3)  # second block can't be loaded
Esempio n. 4
0
 def test_recover_after_read_protocolerror(self, read_mock):
     # Patch DownloadChunkIterator: first attempt to stream bytes raises
     # ProtocolError, but subsequent attempts succeed.
     #
     # We should retry after ProtocolError.
     read_mock.side_effect = [ProtocolError, b'123456']
     _put(b'123456')
     with self.assertLogs(minio.__name__, 'INFO') as logs:
         file = minio.RandomReadMinioFile(Bucket, Key)
         self.assertEqual(file.read(), b'123456')
         self.assertRegex(logs.output[0], 'Retrying exception')
Esempio n. 5
0
 def test_skip_block(self):
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=2)
     file.read(2)  # read block #1
     file.seek(4)  # skip to block #3
     file.read(2)  # read block #3
     # At this point, block #2 shouldn't have been read. Test by deleting
     # the file before trying to read: the data shouldn't come through.
     _clear()
     file.seek(2)
     with self.assertRaises(Exception):
         file.read(2)  # this cannot possibly work
Esempio n. 6
0
 def test_read_stops_at_block_boundary(self):
     # https://docs.python.org/3/library/io.html#io.RawIOBase:
     # Read up to size bytes from the object and return them. As a
     # convenience, if size is unspecified or -1, all bytes until EOF are
     # returned. Otherwise, only one system call is ever made. Fewer than
     # size bytes may be returned if the operating system call returns fewer
     # than size bytes.
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=2)
     self.assertEqual(file.read(4), b'12')
     self.assertEqual(file.read(4), b'34')
     self.assertEqual(file.read(4), b'56')
     self.assertEqual(file.read(4), b'')
Esempio n. 7
0
 def test_delete_remove_uploaded_data_by_prefix_in_case_model_missing(self):
     workflow = Workflow.create_and_init()
     wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1")
     uuid = str(uuidgen.uuid4())
     key = wf_module.uploaded_file_prefix + uuid
     minio.put_bytes(minio.UserFilesBucket, key, b"A\n1")
     # Don't create the UploadedFile. Simulates races during upload/delete
     # that could write a file on S3 but not in our database.
     # wf_module.uploaded_files.create(name='t.csv', size=3, uuid=uuid,
     #                                bucket=minio.UserFilesBucket, key=key)
     wf_module.delete()  # do not crash
     with self.assertRaises(FileNotFoundError):
         with minio.RandomReadMinioFile(minio.UserFilesBucket, key) as f:
             f.read()
Esempio n. 8
0
def _minio_open_random(bucket, key):
    if key.endswith("/_metadata"):
        # fastparquet insists upon trying for the 'hive' storage schema before
        # settling on the 'simple' storage schema. At no time have we ever
        # saved a file in 'hive' format; therefore there are no '_metadata'
        # files; therefore we can skip hitting minio here.
        raise FileNotFoundError

    # TODO store column metadata in the database, so we don't need to read it
    # from S3. Then consider minio.FullReadMinioFile, which could be faster.
    # (We'll want to benchmark.) Another option is to use the 'hive' format and
    # FullReadMinioFile; but that choice would be hard to un-choose, so let's
    # not rush into it.
    raw = minio.RandomReadMinioFile(bucket, key)

    # fastparquet actually expects a _buffered_ reader -- it expects `read()`
    # to always return a buffer of the same length it requests.
    buffered = io.BufferedReader(raw)

    return buffered
Esempio n. 9
0
 def test_read_starting_mid_block(self):
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=3)
     file.seek(2)
     self.assertEqual(file.read(2), b'3')
Esempio n. 10
0
 def test_raise_file_not_found(self):
     with self.assertRaises(FileNotFoundError):
         minio.RandomReadMinioFile(Bucket, Key)
Esempio n. 11
0
 def test_read_entire_file(self):
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=2)
     file.seek(1)
     self.assertEqual(file.read(), b'23456')
Esempio n. 12
0
 def test_seek_to_end(self):
     _put(b'123456')
     file = minio.RandomReadMinioFile(Bucket, Key, block_size=3)
     file.seek(-2, io.SEEK_END)
     self.assertEqual(file.read(), b'56')