def single_gridfs_data_stream(self, graphical=None): """Dump data to database sequentially.""" db = Mongo(self.args) db.connect() cursor = db.getCursor( db_collection_name=self.args["db_collection_name"]+".files") for batch in tqdm(db.getFiles(db_data_cursor=cursor)): for grid in batch: print(grid["_id"]) b = grid["gridout"].read() data = io.BytesIO(b) try: filename = "{}_{}.jpeg".format( grid["metadata"]["datetime"].strftime( "%Y-%m-%dT%H:%M:%S"), grid["_id"]) except KeyError: filename = "{}.jpeg".format(grid["_id"]) if(os.path.isfile(filename) is not True): print(filename) with open( filename, "wb") as f: f.write(data.getbuffer()) else: print(filename, "*")
def test_gridfs(self): """Test/ example of gridfs dump and retrieve from MongoDB.""" from ezdb.mongo import Mongo db = Mongo({"pylog": null_printer}) self.assertIsInstance(db, Mongo) db.connect() db.dump(db_collection_name="test", data=({"success": 1}, b'success')) cursor = db.getCursor(db_collection_name="test.files") for batch in db.getFiles(db_data_cursor=cursor): for grid in batch: # check ids match self.assertEqual(grid["_id"], grid["metadata"]["_id"]) # read file and check is equal to what we put in self.assertEqual(grid["gridout"].read(), b'success')
every_id = [] # check verify if files exist already for batch in db.getBatches(): every_id.extend(list(map(lambda doc: str(doc["_id"]), batch))) print(every_id) file_names = [f for f in glob.glob("*.jpeg")] print(file_names) # get data that does not already exist from database print(db_connect_args["db_pipeline"]) db.getCursor(db_collection_name=db_connect_args["db_collection_name"], db_pipeline=db_connect_args["db_pipeline"]) print("begin stream... (* files that exist in filesystem will be skipped)") for batch in db.getFiles(db_collection_name=top_level_collection_name): for grid in batch: image = grid["gridout"].read() image = io.BytesIO(image) filename = "{}_{}.jpeg".format( grid["metadata"]["datetime"].strftime("%Y-%m-%dT%H:%M:%S"), grid["_id"]) if (os.path.isfile(filename) is not True): print(filename) with open( "{}_{}.jpeg".format( grid["metadata"]["datetime"].strftime( "%Y-%m-%dT%H:%M:%S"), grid["_id"]), "wb") as f: f.write(image.getbuffer()) else: print(filename, "*")