def open_download_stream(self, file_id, session=None): """Opens a Stream from which the application can read the contents of the stored file specified by file_id. For example:: my_db = MongoClient().test fs = GridFSBucket(my_db) # get _id of file to read. file_id = fs.upload_from_stream("test_file", "data I want to store!") grid_out = fs.open_download_stream(file_id) contents = grid_out.read() Returns an instance of :class:`~gridfs.grid_file.GridOut`. Raises :exc:`~gridfs.errors.NoFile` if no file with file_id exists. :Parameters: - `file_id`: The _id of the file to be downloaded. - `session` (optional): a :class:`~pymongo.client_session.ClientSession` .. versionchanged:: 3.6 Added ``session`` parameter. """ gout = GridOut(self._collection, file_id, session=session) # Raise NoFile now, instead of on first attribute access. gout._ensure_file() return gout
def test_multi_chunk_file(self): random_string = b("a") * (DEFAULT_CHUNK_SIZE + 1000) f = GridIn(self.db.fs) f.write(random_string) f.close() self.assertEqual(1, self.db.fs.files.find().count()) self.assertEqual(2, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(random_string, g.read())
def get(self, file_id, callback): """Get a file from GridFS by ``"_id"``. Returns an instance of :class:`~gridfs.grid_file.GridOut`, which provides a file-like interface for reading. :Parameters: - `file_id`: ``"_id"`` of the file to get .. versionadded:: 1.6 """ GridOut.instance_for(self.__collection, callback, file_id)
def test_grid_out_file_document(self): a = GridIn(self.db.fs) a.write("foo bar") a.close() b = GridOut(self.db.fs, file_document=self.db.fs.files.find_one()) self.assertEqual("foo bar", b.read()) c = GridOut(self.db.fs, 5, file_document=self.db.fs.files.find_one()) self.assertEqual("foo bar", c.read()) self.assertRaises(NoFile, GridOut, self.db.fs, file_document={})
def test_multi_chunk_file(self): random_string = qcheck.gen_string(qcheck.lift(300000))() f = GridIn(self.db.fs) f.write(random_string) f.close() self.assertEqual(1, self.db.fs.files.find().count()) self.assertEqual(2, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(random_string, g.read())
def write_me(s, chunk_size): buf = StringIO(s) infile = GridIn(self.db.fs) while True: to_write = buf.read(chunk_size) if to_write == b(''): break infile.write(to_write) infile.close() buf.close() outfile = GridOut(self.db.fs, infile._id) data = outfile.read() self.assertEqual(s, data)
def get(self, file_id): """Get a file from GridFS by ``"_id"``. Returns an instance of :class:`~gridfs.grid_file.GridOut`, which provides a file-like interface for reading. :Parameters: - `file_id`: ``"_id"`` of the file to get """ gout = GridOut(self.__collection, file_id) # Raise NoFile now, instead of on first attribute access. gout._ensure_file() return gout
def test_read_chunks_unaligned_buffer_size(self): in_data = "This is a text that doesn't quite fit in a single 16-byte chunk." f = GridIn(self.db.fs, chunkSize=16) f.write(in_data) f.close() g = GridOut(self.db.fs, f._id) out_data = '' while 1: s = g.read(13) if not s: break out_data += s self.assertEqual(in_data, out_data)
def test_set_after_close(self): f = GridIn(self.db.fs, _id="foo", bar="baz") self.assertEqual("foo", f._id) self.assertEqual("baz", f.bar) self.assertRaises(AttributeError, getattr, f, "baz") self.assertRaises(AttributeError, getattr, f, "uploadDate") self.assertRaises(AttributeError, setattr, f, "_id", 5) f.bar = "foo" f.baz = 5 self.assertEqual("foo", f._id) self.assertEqual("foo", f.bar) self.assertEqual(5, f.baz) self.assertRaises(AttributeError, getattr, f, "uploadDate") f.close() self.assertEqual("foo", f._id) self.assertEqual("foo", f.bar) self.assertEqual(5, f.baz) self.assert_(f.uploadDate) self.assertRaises(AttributeError, setattr, f, "_id", 5) f.bar = "a" f.baz = "b" self.assertRaises(AttributeError, setattr, f, "upload_date", 5) g = GridOut(self.db.fs, f._id) self.assertEqual("a", f.bar) self.assertEqual("b", f.baz)
def test_alternate_collection(self): self.db.alt.files.remove({}) self.db.alt.chunks.remove({}) f = GridIn(self.db.alt) f.write("hello world") f.close() self.assertEqual(1, self.db.alt.files.find().count()) self.assertEqual(1, self.db.alt.chunks.find().count()) g = GridOut(self.db.alt, f._id) self.assertEqual("hello world", g.read()) # test that md5 still works... self.assertEqual("5eb63bbbe01eeed093cb22bb8f5acdc3", g.md5)
def test_grid_out_default_opts(self): self.assertRaises(TypeError, GridOut, "foo") self.assertRaises(NoFile, GridOut, self.db.fs, 5) a = GridIn(self.db.fs) a.close() b = GridOut(self.db.fs, a._id) self.assertEqual(a._id, b._id) self.assertEqual(0, b.length) self.assertEqual(None, b.content_type) self.assertEqual(None, b.name) self.assertEqual(None, b.filename) self.assertEqual(256 * 1024, b.chunk_size) self.assertTrue(isinstance(b.upload_date, datetime.datetime)) self.assertEqual(None, b.aliases) self.assertEqual(None, b.metadata) self.assertEqual("d41d8cd98f00b204e9800998ecf8427e", b.md5) for attr in [ "_id", "name", "content_type", "length", "chunk_size", "upload_date", "aliases", "metadata", "md5" ]: self.assertRaises(AttributeError, setattr, b, attr, 5)
def test_alternate_collection(self): self.db.alt.files.remove({}) self.db.alt.chunks.remove({}) f = GridIn(self.db.alt) f.write(b("hello world")) f.close() self.assertEqual(1, self.db.alt.files.find().count()) self.assertEqual(1, self.db.alt.chunks.find().count()) g = GridOut(self.db.alt, f._id) self.assertEqual(b("hello world"), g.read()) # test that md5 still works... self.assertEqual("5eb63bbbe01eeed093cb22bb8f5acdc3", g.md5)
def test_grid_out_custom_opts(self): one = GridIn(self.db.fs, _id=5, filename="my_file", contentType="text/html", chunkSize=1000, aliases=["foo"], metadata={ "foo": 1, "bar": 2 }, bar=3, baz="hello") one.write(b("hello world")) one.close() two = GridOut(self.db.fs, 5) self.assertEqual(5, two._id) self.assertEqual(11, two.length) self.assertEqual("text/html", two.content_type) self.assertEqual(1000, two.chunk_size) self.assertTrue(isinstance(two.upload_date, datetime.datetime)) self.assertEqual(["foo"], two.aliases) self.assertEqual({"foo": 1, "bar": 2}, two.metadata) self.assertEqual(3, two.bar) self.assertEqual("5eb63bbbe01eeed093cb22bb8f5acdc3", two.md5) for attr in [ "_id", "name", "content_type", "length", "chunk_size", "upload_date", "aliases", "metadata", "md5" ]: self.assertRaises(AttributeError, setattr, two, attr, 5)
def test_read_chunks_unaligned_buffer_size(self): in_data = b("This is a text that doesn't " "quite fit in a single 16-byte chunk.") f = GridIn(self.db.fs, chunkSize=16) f.write(in_data) f.close() g = GridOut(self.db.fs, f._id) out_data = b('') while 1: s = g.read(13) if not s: break out_data += s self.assertEqual(in_data, out_data)
def test_survive_cursor_not_found(self): # By default the find command returns 101 documents in the first batch. # Use 102 batches to cause a single getMore. chunk_size = 1024 data = b'd' * (102 * chunk_size) listener = EventListener() client = rs_or_single_client(event_listeners=[listener]) db = client.pymongo_test with GridIn(db.fs, chunk_size=chunk_size) as infile: infile.write(data) with GridOut(db.fs, infile._id) as outfile: self.assertEqual(len(outfile.readchunk()), chunk_size) # Kill the cursor to simulate the cursor timing out on the server # when an application spends a long time between two calls to # readchunk(). client._close_cursor_now( outfile._GridOut__chunk_iter._cursor.cursor_id, _CursorAddress(client.address, db.fs.chunks.full_name)) # Read the rest of the file without error. self.assertEqual(len(outfile.read()), len(data) - chunk_size) # Paranoid, ensure that a getMore was actually sent. self.assertIn("getMore", listener.started_command_names())
def test_set_after_close(self): f = GridIn(self.db.fs, _id="foo", bar="baz") self.assertEqual("foo", f._id) self.assertEqual("baz", f.bar) self.assertRaises(AttributeError, getattr, f, "baz") self.assertRaises(AttributeError, getattr, f, "uploadDate") self.assertRaises(AttributeError, setattr, f, "_id", 5) f.bar = "foo" f.baz = 5 self.assertEqual("foo", f._id) self.assertEqual("foo", f.bar) self.assertEqual(5, f.baz) self.assertRaises(AttributeError, getattr, f, "uploadDate") f.close() self.assertEqual("foo", f._id) self.assertEqual("foo", f.bar) self.assertEqual(5, f.baz) self.assertTrue(f.uploadDate) self.assertRaises(AttributeError, setattr, f, "_id", 5) f.bar = "a" f.baz = "b" self.assertRaises(AttributeError, setattr, f, "upload_date", 5) g = GridOut(self.db.fs, f._id) self.assertEqual("a", g.bar) self.assertEqual("b", g.baz) # Versions 2.0.1 and older saved a _closed field for some reason. self.assertRaises(AttributeError, getattr, g, "_closed")
def test_alternate_collection(self): self.db.alt.files.delete_many({}) self.db.alt.chunks.delete_many({}) f = GridIn(self.db.alt) f.write(b"hello world") f.close() self.assertEqual(1, self.db.alt.files.count_documents({})) self.assertEqual(1, self.db.alt.chunks.count_documents({})) g = GridOut(self.db.alt, f._id) self.assertEqual(b"hello world", g.read()) # test that md5 still works... self.assertEqual("5eb63bbbe01eeed093cb22bb8f5acdc3", g.md5)
def get_last_version(self, filename): """Get a file from GridFS by ``"filename"``. Returns the most recently uploaded file in GridFS with the name `filename` as an instance of :class:`~gridfs.grid_file.GridOut`. Raises :class:`~gridfs.errors.NoFile` if no such file exists. An index on ``{filename: 1, uploadDate: -1}`` will automatically be created when this method is called the first time. :Parameters: - `filename`: ``"filename"`` of the file to get .. versionadded:: 1.6 """ self.__files.ensure_index([("filename", ASCENDING), ("uploadDate", DESCENDING)]) cursor = self.__files.find({"filename": filename}) cursor.limit(-1).sort("uploadDate", DESCENDING) try: grid_file = cursor.next() return GridOut(self.__collection, grid_file["_id"]) except StopIteration: raise NoFile("no file in gridfs with filename %r" % filename)
def get_version( self, filename: Optional[str] = None, version: Optional[int] = -1, session: Optional[ClientSession] = None, **kwargs: Any ) -> GridOut: """Get a file from GridFS by ``"filename"`` or metadata fields. Returns a version of the file in GridFS whose filename matches `filename` and whose metadata fields match the supplied keyword arguments, as an instance of :class:`~gridfs.grid_file.GridOut`. Version numbering is a convenience atop the GridFS API provided by MongoDB. If more than one file matches the query (either by `filename` alone, by metadata fields, or by a combination of both), then version ``-1`` will be the most recently uploaded matching file, ``-2`` the second most recently uploaded, etc. Version ``0`` will be the first version uploaded, ``1`` the second version, etc. So if three versions have been uploaded, then version ``0`` is the same as version ``-3``, version ``1`` is the same as version ``-2``, and version ``2`` is the same as version ``-1``. Raises :class:`~gridfs.errors.NoFile` if no such version of that file exists. :Parameters: - `filename`: ``"filename"`` of the file to get, or `None` - `version` (optional): version of the file to get (defaults to -1, the most recent version uploaded) - `session` (optional): a :class:`~pymongo.client_session.ClientSession` - `**kwargs` (optional): find files by custom metadata. .. versionchanged:: 3.6 Added ``session`` parameter. .. versionchanged:: 3.1 ``get_version`` no longer ensures indexes. """ query = kwargs if filename is not None: query["filename"] = filename _disallow_transactions(session) cursor = self.__files.find(query, session=session) if version is None: version = -1 if version < 0: skip = abs(version) - 1 cursor.limit(-1).skip(skip).sort("uploadDate", DESCENDING) else: cursor.limit(-1).skip(version).sort("uploadDate", ASCENDING) try: doc = next(cursor) return GridOut(self.__collection, file_document=doc, session=session) except StopIteration: raise NoFile("no version %d for filename %r" % (version, filename))
def open_download_stream_by_name(self, filename, revision=-1, session=None): """Opens a Stream from which the application can read the contents of `filename` and optional `revision`. For example:: my_db = MongoClient().test fs = GridFSBucket(my_db) grid_out = fs.open_download_stream_by_name("test_file") contents = grid_out.read() Returns an instance of :class:`~gridfs.grid_file.GridOut`. Raises :exc:`~gridfs.errors.NoFile` if no such version of that file exists. Raises :exc:`~ValueError` filename is not a string. :Parameters: - `filename`: The name of the file to read from. - `revision` (optional): Which revision (documents with the same filename and different uploadDate) of the file to retrieve. Defaults to -1 (the most recent revision). - `session` (optional): a :class:`~pymongo.client_session.ClientSession` :Note: Revision numbers are defined as follows: - 0 = the original stored file - 1 = the first revision - 2 = the second revision - etc... - -2 = the second most recent revision - -1 = the most recent revision .. versionchanged:: 3.6 Added ``session`` parameter. """ validate_string("filename", filename) query = {"filename": filename} cursor = self._files.find(query, session=session) if revision < 0: skip = abs(revision) - 1 cursor.limit(-1).skip(skip).sort("uploadDate", DESCENDING) else: cursor.limit(-1).skip(revision).sort("uploadDate", ASCENDING) try: grid_file = next(cursor) return GridOut(self._collection, file_document=grid_file, session=session) except StopIteration: raise NoFile("no version %d for filename %r" % (revision, filename))
def test_context_manager(self): contents = b"Imagine this is some important data..." with GridIn(self.db.fs, filename="important") as infile: infile.write(contents) with GridOut(self.db.fs, infile._id) as outfile: self.assertEqual(contents, outfile.read())
def test_write_unicode(self): f = GridIn(self.db.fs) self.assertRaises(TypeError, f.write, u"foo") f = GridIn(self.db.fs, encoding="utf-8") f.write(u"foo") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual("foo", g.read()) f = GridIn(self.db.fs, encoding="iso-8859-1") f.write(u"aé") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(u"aé".encode("iso-8859-1"), g.read())
def helper(data): f = GridIn(self.db.fs, chunkSize=1) f.write(data) f.close() self.files += 1 self.chunks += len(data) self.assertEqual(self.files, self.db.fs.files.find().count()) self.assertEqual(self.chunks, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(data, g.read()) g = GridOut(self.db.fs, f._id) self.assertEqual(data, g.read(10) + g.read(10)) return True
def test_readline(self): f = GridIn(self.db.fs, chunkSize=5) f.write(b("""Hello world, How are you? Hope all is well. Bye""")) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(b("H"), g.read(1)) self.assertEqual(b("ello world,\n"), g.readline()) self.assertEqual(b("How a"), g.readline(5)) self.assertEqual(b(""), g.readline(0)) self.assertEqual(b("re you?\n"), g.readline()) self.assertEqual(b("Hope all is well.\n"), g.readline(1000)) self.assertEqual(b("Bye"), g.readline()) self.assertEqual(b(""), g.readline())
def get_version(self, filename=None, version=-1, **kwargs): """Get a file from GridFS by ``"filename"`` or metadata fields. Returns a version of the file in GridFS whose filename matches `filename` and whose metadata fields match the supplied keyword arguments, as an instance of :class:`~gridfs.grid_file.GridOut`. Version numbering is a convenience atop the GridFS API provided by MongoDB. If more than one file matches the query (either by `filename` alone, by metadata fields, or by a combination of both), then version ``-1`` will be the most recently uploaded matching file, ``-2`` the second most recently uploaded, etc. Version ``0`` will be the first version uploaded, ``1`` the second version, etc. So if three versions have been uploaded, then version ``0`` is the same as version ``-3``, version ``1`` is the same as version ``-2``, and version ``2`` is the same as version ``-1``. Raises :class:`~gridfs.errors.NoFile` if no such version of that file exists. An index on ``{filename: 1, uploadDate: -1}`` will automatically be created when this method is called the first time. :Parameters: - `filename`: ``"filename"`` of the file to get, or `None` - `version` (optional): version of the file to get (defualts to -1, the most recent version uploaded) - `**kwargs` (optional): find files by custom metadata. .. versionchanged:: 1.11 `filename` defaults to None; .. versionadded:: 1.11 Accept keyword arguments to find files by custom metadata. .. versionadded:: 1.9 """ database = self.__database if not database.slave_okay and not database.read_preference: self.__files.ensure_index([("filename", ASCENDING), ("uploadDate", DESCENDING)]) query = kwargs if filename is not None: query["filename"] = filename cursor = self.__files.find(query) if version < 0: skip = abs(version) - 1 cursor.limit(-1).skip(skip).sort("uploadDate", DESCENDING) else: cursor.limit(-1).skip(version).sort("uploadDate", ASCENDING) try: grid_file = cursor.next() return GridOut(self.__collection, file_document=grid_file) except StopIteration: raise NoFile("no version %d for filename %r" % (version, filename))
def get(self, file_id, session=None): """Get a file from GridFS by ``"_id"``. Returns an instance of :class:`~gridfs.grid_file.GridOut`, which provides a file-like interface for reading. :Parameters: - `file_id`: ``"_id"`` of the file to get - `session` (optional): a :class:`~pymongo.client_session.ClientSession` .. versionchanged:: 3.6 Added ``session`` parameter. """ gout = GridOut(self.__collection, file_id, session=session) # Raise NoFile now, instead of on first attribute access. gout._ensure_file() return gout
def get(self, file_id: Any, session: Optional[ClientSession] = None) -> GridOut: """Get a file from GridFS by ``"_id"``. Returns an instance of :class:`~gridfs.grid_file.GridOut`, which provides a file-like interface for reading. :Parameters: - `file_id`: ``"_id"`` of the file to get - `session` (optional): a :class:`~pymongo.client_session.ClientSession` .. versionchanged:: 3.6 Added ``session`` parameter. """ gout = GridOut(self.__collection, file_id, session=session) # Raise NoFile now, instead of on first attribute access. gout._ensure_file() return gout
def test_closed(self): f = GridIn(self.db.fs, chunkSize=5) f.write(b"Hello world.\nHow are you?") f.close() g = GridOut(self.db.fs, f._id) self.assertFalse(g.closed) g.read(1) self.assertFalse(g.closed) g.read(100) self.assertFalse(g.closed) g.close() self.assertTrue(g.closed)
def test_iterator(self): f = GridIn(self.db.fs) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual([], list(g)) f = GridIn(self.db.fs) f.write("hello world") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(["hello world"], list(g)) self.assertEqual("hello", g.read(5)) self.assertEqual(["hello world"], list(g)) self.assertEqual(" worl", g.read(5)) f = GridIn(self.db.fs, chunk_size=2) f.write("hello world") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(["he", "ll", "o ", "wo", "rl", "d"], list(g))
def test_iterator(self): f = GridIn(self.db.fs) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual([], list(g)) f = GridIn(self.db.fs) f.write(b("hello world")) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual([b("hello world")], list(g)) self.assertEqual(b("hello"), g.read(5)) self.assertEqual([b("hello world")], list(g)) self.assertEqual(b(" worl"), g.read(5)) f = GridIn(self.db.fs, chunk_size=2) f.write(b("hello world")) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual([b("he"), b("ll"), b("o "), b("wo"), b("rl"), b("d")], list(g))
def test_readline(self): f = GridIn(self.db.fs, chunkSize=5) f.write("""Hello world, How are you? Hope all is well. Bye""") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual("H", g.read(1)) self.assertEqual("ello world,\n", g.readline()) self.assertEqual("How a", g.readline(5)) self.assertEqual("", g.readline(0)) self.assertEqual("re you?\n", g.readline()) self.assertEqual("Hope all is well.\n", g.readline(1000)) self.assertEqual("Bye", g.readline()) self.assertEqual("", g.readline())
def test_write_file_like(self): one = GridIn(self.db.fs) one.write(b("hello world")) one.close() two = GridOut(self.db.fs, one._id) three = GridIn(self.db.fs) three.write(two) three.close() four = GridOut(self.db.fs, three._id) self.assertEqual(b("hello world"), four.read()) five = GridIn(self.db.fs, chunk_size=2) five.write(b("hello")) buffer = StringIO(b(" world")) five.write(buffer) five.write(b(" and mongodb")) five.close() self.assertEqual(b("hello world and mongodb"), GridOut(self.db.fs, five._id).read())
def test_write_file_like(self): a = GridIn(self.db.fs) a.write(b"hello world") a.close() b = GridOut(self.db.fs, a._id) c = GridIn(self.db.fs) c.write(b) c.close() d = GridOut(self.db.fs, c._id) self.assertEqual(b"hello world", d.read()) e = GridIn(self.db.fs, chunk_size=2) e.write(b"hello") buffer = BytesIO(b" world") e.write(buffer) e.write(b" and mongodb") e.close() self.assertEqual(b"hello world and mongodb", GridOut(self.db.fs, e._id).read())
def get(self, file_id): """Get a file from GridFS by ``"_id"``. Returns an instance of :class:`~gridfs.grid_file.GridOut`, which provides a file-like interface for reading. :Parameters: - `file_id`: ``"_id"`` of the file to get .. versionadded:: 1.6 """ return GridOut(self.__collection, file_id)
def test_write_file_like(self): a = GridIn(self.db.fs) a.write("hello world") a.close() b = GridOut(self.db.fs, a._id) c = GridIn(self.db.fs) c.write(b) c.close() d = GridOut(self.db.fs, c._id) self.assertEqual("hello world", d.read()) e = GridIn(self.db.fs, chunk_size=2) e.write("hello") buffer = StringIO(" world") e.write(buffer) e.write(" and mongodb") e.close() self.assertEqual("hello world and mongodb", GridOut(self.db.fs, e._id).read())
def test_write_file_like(self): one = GridIn(self.db.fs) one.write(b"hello world") one.close() two = GridOut(self.db.fs, one._id) three = GridIn(self.db.fs) three.write(two) three.close() four = GridOut(self.db.fs, three._id) self.assertEqual(b"hello world", four.read()) five = GridIn(self.db.fs, chunk_size=2) five.write(b"hello") buffer = StringIO(b" world") five.write(buffer) five.write(b" and mongodb") five.close() self.assertEqual(b"hello world and mongodb", GridOut(self.db.fs, five._id).read())
def test_multiple_reads(self): f = GridIn(self.db.fs, chunkSize=3) f.write(b"hello world") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(b"he", g.read(2)) self.assertEqual(b"ll", g.read(2)) self.assertEqual(b"o ", g.read(2)) self.assertEqual(b"wo", g.read(2)) self.assertEqual(b"rl", g.read(2)) self.assertEqual(b"d", g.read(2)) self.assertEqual(b"", g.read(2))
def get(self, file_id, session=None): """Get a file from GridFS by ``"_id"``. Returns an instance of :class:`~gridfs.grid_file.GridOut`, which provides a file-like interface for reading. :Parameters: - `file_id`: ``"_id"`` of the file to get - `session` (optional): a :class:`~pymongo.client_session.ClientSession` The id is type sensitive ensure the same type is used for putting and getting data from GridFS. If the id was generated by put it should (still) be an instance of :class: `~bson.objectid.ObjectId` .. versionchanged:: 3.6 Added ``session`` parameter. """ gout = GridOut(self.__collection, file_id, session=session) # Raise NoFile now, instead of on first attribute access. gout._ensure_file() return gout
def test_basic(self): f = GridIn(self.db.fs, filename="test") f.write(b"hello world") f.close() self.assertEqual(1, self.db.fs.files.count_documents({})) self.assertEqual(1, self.db.fs.chunks.count_documents({})) g = GridOut(self.db.fs, f._id) self.assertEqual(b"hello world", g.read()) # make sure it's still there... g = GridOut(self.db.fs, f._id) self.assertEqual(b"hello world", g.read()) f = GridIn(self.db.fs, filename="test") f.close() self.assertEqual(2, self.db.fs.files.count_documents({})) self.assertEqual(1, self.db.fs.chunks.count_documents({})) g = GridOut(self.db.fs, f._id) self.assertEqual(b"", g.read()) # test that reading 0 returns proper type self.assertEqual(b"", g.read(0))
def test_readchunk(self): in_data = b'a' * 10 f = GridIn(self.db.fs, chunkSize=3) f.write(in_data) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(3, len(g.readchunk())) self.assertEqual(2, len(g.read(2))) self.assertEqual(1, len(g.readchunk())) self.assertEqual(3, len(g.read(3))) self.assertEqual(1, len(g.readchunk())) self.assertEqual(0, len(g.readchunk()))
def test_basic(self): f = GridIn(self.db.fs, filename="test") f.write(b("hello world")) f.close() self.assertEqual(1, self.db.fs.files.find().count()) self.assertEqual(1, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(b("hello world"), g.read()) # make sure it's still there... g = GridOut(self.db.fs, f._id) self.assertEqual(b("hello world"), g.read()) f = GridIn(self.db.fs, filename="test") f.close() self.assertEqual(2, self.db.fs.files.find().count()) self.assertEqual(1, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(b(""), g.read())
def test_grid_out_file_document(self): one = GridIn(self.db.fs) one.write(b"foo bar") one.close() two = GridOut(self.db.fs, file_document=self.db.fs.files.find_one()) self.assertEqual(b"foo bar", two.read()) three = GridOut(self.db.fs, 5, file_document=self.db.fs.files.find_one()) self.assertEqual(b"foo bar", three.read()) four = GridOut(self.db.fs, file_document={}) with self.assertRaises(NoFile): four.name
def read_bytes_io(obj: GridOut) -> np.array: """reads a binary file stored in mongodb and returns a numpy array Parameters ---------- obj : GridOut output from a mongodb girdfs file Returns ------- np.array numpy array containing the information loaded from gridfs file """ return np.load(io.BytesIO(obj.read()), allow_pickle=True)
def test_zip(self): zf = BytesIO() z = zipfile.ZipFile(zf, "w") z.writestr("test.txt", b"hello world") z.close() zf.seek(0) f = GridIn(self.db.fs, filename="test.zip") f.write(zf) f.close() self.assertEqual(1, self.db.fs.files.count_documents({})) self.assertEqual(1, self.db.fs.chunks.count_documents({})) g = GridOut(self.db.fs, f._id) z = zipfile.ZipFile(g) self.assertSequenceEqual(z.namelist(), ["test.txt"]) self.assertEqual(z.read("test.txt"), b"hello world")
def test_grid_out_file_document(self): one = GridIn(self.db.fs) one.write(b("foo bar")) one.close() two = GridOut(self.db.fs, file_document=self.db.fs.files.find_one()) self.assertEqual(b("foo bar"), two.read()) three = GridOut(self.db.fs, 5, file_document=self.db.fs.files.find_one()) self.assertEqual(b("foo bar"), three.read()) self.assertRaises(NoFile, GridOut, self.db.fs, file_document={})
def test_grid_out_lazy_connect(self): fs = self.db.fs outfile = GridOut(fs, file_id=-1, _connect=False) self.assertRaises(NoFile, outfile.read) self.assertRaises(NoFile, getattr, outfile, 'filename') infile = GridIn(fs, filename=1) infile.close() outfile = GridOut(fs, infile._id, _connect=False) outfile.read() outfile.filename
def test_multiple_reads(self): f = GridIn(self.db.fs, chunkSize=3) f.write("hello world") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual("he", g.read(2)) self.assertEqual("ll", g.read(2)) self.assertEqual("o ", g.read(2)) self.assertEqual("wo", g.read(2)) self.assertEqual("rl", g.read(2)) self.assertEqual("d", g.read(2)) self.assertEqual("", g.read(2))
def test_readchunk(self): in_data = b("a") * 10 f = GridIn(self.db.fs, chunkSize=3) f.write(in_data) f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(3, len(g.readchunk())) self.assertEqual(2, len(g.read(2))) self.assertEqual(1, len(g.readchunk())) self.assertEqual(3, len(g.read(3))) self.assertEqual(1, len(g.readchunk())) self.assertEqual(0, len(g.readchunk()))
def test_basic(self): f = GridIn(self.db.fs, filename="test") f.write(b("hello world")) f.close() self.assertEqual(1, self.db.fs.files.find().count()) self.assertEqual(1, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(b("hello world"), g.read()) # make sure it's still there... g = GridOut(self.db.fs, f._id) self.assertEqual(b("hello world"), g.read()) f = GridIn(self.db.fs, filename="test") f.close() self.assertEqual(2, self.db.fs.files.find().count()) self.assertEqual(1, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual(b(""), g.read()) # test that reading 0 returns proper type self.assertEqual(b(""), g.read(0))
def test_basic(self): f = GridIn(self.db.fs, filename="test") f.write("hello world") f.close() self.assertEqual(1, self.db.fs.files.find().count()) self.assertEqual(1, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual("hello world", g.read()) # make sure it's still there... g = GridOut(self.db.fs, f._id) self.assertEqual("hello world", g.read()) f = GridIn(self.db.fs, filename="test") f.close() self.assertEqual(2, self.db.fs.files.find().count()) self.assertEqual(1, self.db.fs.chunks.find().count()) g = GridOut(self.db.fs, f._id) self.assertEqual("", g.read())
def test_tell(self): f = GridIn(self.db.fs, chunkSize=3) f.write("hello world") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual(0, g.tell()) g.read(0) self.assertEqual(0, g.tell()) g.read(1) self.assertEqual(1, g.tell()) g.read(2) self.assertEqual(3, g.tell()) g.read() self.assertEqual(g.length, g.tell())
def test_seek(self): f = GridIn(self.db.fs, chunkSize=3) f.write("hello world") f.close() g = GridOut(self.db.fs, f._id) self.assertEqual("hello world", g.read()) g.seek(0) self.assertEqual("hello world", g.read()) g.seek(1) self.assertEqual("ello world", g.read()) self.assertRaises(IOError, g.seek, -1) g.seek(-3, _SEEK_END) self.assertEqual("rld", g.read()) g.seek(0, _SEEK_END) self.assertEqual("", g.read()) self.assertRaises(IOError, g.seek, -100, _SEEK_END) g.seek(3) g.seek(3, _SEEK_CUR) self.assertEqual("world", g.read()) self.assertRaises(IOError, g.seek, -100, _SEEK_CUR)
def mod_callback(response): if len(response) > 0: GridOut.instance_for(self.__collection, callback, file_document=response[0]) else: callback(None)
def test_readline(self): f = GridIn(self.db.fs, chunkSize=5) f.write( b( """Hello world, How are you? Hope all is well. Bye""" ) ) f.close() # Try read(), then readline(). g = GridOut(self.db.fs, f._id) self.assertEqual(b("H"), g.read(1)) self.assertEqual(b("ello world,\n"), g.readline()) self.assertEqual(b("How a"), g.readline(5)) self.assertEqual(b(""), g.readline(0)) self.assertEqual(b("re you?\n"), g.readline()) self.assertEqual(b("Hope all is well.\n"), g.readline(1000)) self.assertEqual(b("Bye"), g.readline()) self.assertEqual(b(""), g.readline()) # Try readline() first, then read(). g = GridOut(self.db.fs, f._id) self.assertEqual(b("He"), g.readline(2)) self.assertEqual(b("l"), g.read(1)) self.assertEqual(b("lo"), g.readline(2)) self.assertEqual(b(" world,\n"), g.readline()) # Only readline(). g = GridOut(self.db.fs, f._id) self.assertEqual(b("H"), g.readline(1)) self.assertEqual(b("e"), g.readline(1)) self.assertEqual(b("llo world,\n"), g.readline())