Beispiel #1
0
    def single_gridfs_data_stream(self, graphical=None):
        """Dump data to database sequentially."""
        db = Mongo(self.args)
        db.connect()
        cursor = db.getCursor(
            db_collection_name=self.args["db_collection_name"]+".files")
        for batch in tqdm(db.getFiles(db_data_cursor=cursor)):
            for grid in batch:
                print(grid["_id"])
                b = grid["gridout"].read()
                data = io.BytesIO(b)

                try:
                    filename = "{}_{}.jpeg".format(
                        grid["metadata"]["datetime"].strftime(
                            "%Y-%m-%dT%H:%M:%S"),
                        grid["_id"])
                except KeyError:
                    filename = "{}.jpeg".format(grid["_id"])

                if(os.path.isfile(filename) is not True):
                    print(filename)
                    with open(
                        filename,
                            "wb") as f:
                        f.write(data.getbuffer())
                else:
                    print(filename, "*")
Beispiel #2
0
    def test_gridfs(self):
        """Test/ example of gridfs dump and retrieve from MongoDB."""
        from ezdb.mongo import Mongo

        db = Mongo({"pylog": null_printer})
        self.assertIsInstance(db, Mongo)
        db.connect()
        db.dump(db_collection_name="test", data=({"success": 1}, b'success'))
        cursor = db.getCursor(db_collection_name="test.files")
        for batch in db.getFiles(db_data_cursor=cursor):
            for grid in batch:
                # check ids match
                self.assertEqual(grid["_id"], grid["metadata"]["_id"])
                # read file and check is equal to what we put in
                self.assertEqual(grid["gridout"].read(), b'success')
Beispiel #3
0
every_id = []
# check verify if files exist already
for batch in db.getBatches():
    every_id.extend(list(map(lambda doc: str(doc["_id"]), batch)))

print(every_id)
file_names = [f for f in glob.glob("*.jpeg")]
print(file_names)

# get data that does not already exist from database
print(db_connect_args["db_pipeline"])
db.getCursor(db_collection_name=db_connect_args["db_collection_name"],
             db_pipeline=db_connect_args["db_pipeline"])
print("begin stream... (* files that exist in filesystem will be skipped)")
for batch in db.getFiles(db_collection_name=top_level_collection_name):
    for grid in batch:
        image = grid["gridout"].read()
        image = io.BytesIO(image)
        filename = "{}_{}.jpeg".format(
            grid["metadata"]["datetime"].strftime("%Y-%m-%dT%H:%M:%S"),
            grid["_id"])
        if (os.path.isfile(filename) is not True):
            print(filename)
            with open(
                    "{}_{}.jpeg".format(
                        grid["metadata"]["datetime"].strftime(
                            "%Y-%m-%dT%H:%M:%S"), grid["_id"]), "wb") as f:
                f.write(image.getbuffer())
        else:
            print(filename, "*")