def test_gridfs_lazy_connect(self):
        client = MongoClient('badhost', connect=False,
                             serverSelectionTimeoutMS=0)
        cdb = client.db
        gfs = gridfs.GridFSBucket(cdb)
        self.assertRaises(ServerSelectionTimeoutError, gfs.delete, 0)

        gfs = gridfs.GridFSBucket(cdb)
        self.assertRaises(
            ServerSelectionTimeoutError,
            gfs.upload_from_stream, "test", b"")  # Still no connection.
예제 #2
0
    def read_model(self, collection, id):
        """Reads a serialized sklearn model from MongoDB.

        Args:
            collection (str): collection name
            id (ObjectId): the id of the file
        Returns:
            list
        """

        #Establish connection
        if self.client is None:
            self.create_client()

        #Connect to db
        db = self.client[self.db]

        fs = gridfs.GridFSBucket(db, collection)

        try:
            with fs.open_download_stream(id) as handler:
                out = handler.read()
            self.logger.info(f'[GRIDFS_CHUNK_NUM]: {len(out)} chunks are returned from [COLLECTION]: {collection} with [ID]: {id} ')

        except Exception as e:
            self.logger.error('MongoDB read failed: {}'.format(e))
            raise

        return out
예제 #3
0
    def __init__(self,
                 database,
                 collection='fs',
                 logfile=None,
                 debug=os.environ.get('GRIDFS_FUSE_DEBUG'),
                 filename_encoding='utf-8'):
        super().__init__()

        self.logger = logging.getLogger("gridfs_fuse")
        self.logger.setLevel(logging.DEBUG if debug else logging.ERROR)
        try:
            self.handler = logging.FileHandler(logfile)
            self.handler.setLevel(logging.DEBUG)
        except:
            pass
        #self._readonly = True
        self._readonly = False
        self._database = database
        self._collection = collection
        self._filename_encoding = filename_encoding

        self.gridfs = gridfs.GridFS(database, collection)
        self.gridfsbucket = gridfs.GridFSBucket(database, collection)
        self.gridfs_files = compat_collection(database, collection + '.files')

        self.inode2id = {pyfuse3.ROOT_INODE: '0'}

        self.id2inode = {'0': pyfuse3.ROOT_INODE}

        self._last_inode = pyfuse3.ROOT_INODE + 1

        self.root_stamp = time_ns()

        self.active_inodes = collections.defaultdict(int)
        self.active_writes = {}
예제 #4
0
 def __init__(self) -> None:
     # Note: Need "connect=False" so that we don't connect until the first
     # time we interact with the database. Required for the gem5 running
     # celery server
     self.db = MongoClient(connect=False).artifact_database
     self.artifacts = self.db.artifacts
     self.fs = gridfs.GridFSBucket(self.db, disable_md5=True)
예제 #5
0
def DownGridFSDicom(path, db):
    fs = gridfs.GridFSBucket(db)
    for data in db.fs.files.find({}, {'filename': True}):
        filename = data['filename']
        with open(path + filename, 'wb') as dicom:
            fs.download_to_stream_by_name(data['filename'], dicom)
    print('Done')
def mongodb_gridfs(connector_access, local_result_file, meta_data):
    local_file_path = os.path.join(local_result_file['dir'],
                                   local_result_file['name'])

    client = _mongodb_client(connector_access)
    db = client[connector_access['db']]
    fs = gridfs.GridFSBucket(db)

    md = connector_access.get('metadata')
    if meta_data:
        if not md:
            md = {}
        for key, val in meta_data.items():
            try:
                md[key] = [ObjectId(val)]
            except:
                md[key] = val

    with open(local_file_path, 'rb') as f:
        fs.upload_from_stream(connector_access.get('file_name',
                                                   str(uuid.uuid4())),
                              f,
                              chunk_size_bytes=4096,
                              metadata=md)
    client.close()
예제 #7
0
    def dumpfiles(self, collection):
        mime = MimeTypes()

        db = MongoClient(host=self.host, port=self.port)[self.db]
        uploadsCollection = db[collection]
        fs = gridfs.GridFSBucket(db, bucket_name=collection)

        uploads = uploadsCollection.find({}, no_cursor_timeout=True)

        for upload in uploads:
            if upload["store"] == "GridFS:Uploads":
                if "complete" in upload and upload["complete"] is True:
                    path = upload["path"]
                    pathSegments = path.split("/")
                    gridfsId = pathSegments[3]
                    for res in fs.find({"_id": gridfsId}):
                        data = res.read()
                        fileext = ""
                        if "extension" in upload:
                            fileext = upload["extension"]
                        else:
                            fileext = mime.guess_extension(res.content_type)
                        if fileext is not None and fileext != "":
                            filename = gridfsId + "." + fileext
                        else:
                            filename = gridfsId
                        file = open(self.outDir + "/" + filename, "wb")
                        file.write(data)
                        file.close()
                        self.addtolog(gridfsId, filename, collection, res.md5)
                else:
                    print(upload)
        self.writelog()
예제 #8
0
    def process_item(self, item, spider):
        title = item['title'] or ""
        comic = item['comic']
        image = item['image']
        thumbnail = item['thumbnail']
        subtext = item['subtext']
        url = item['url']
        order = item['order']
        datetime = item['createdAt'] if 'createdAt' in item else None

        mongodb_item = self.collection.find_one({'comic': comic, 'url': url})

        if not mongodb_item:
            fs = gridfs.GridFSBucket(self.db)
            file_id = fs.upload_from_stream(image, open(image))
            thumbnail_file_id = fs.upload_from_stream(image, open(thumbnail))

            self.collection.insert({
                'comic': comic,
                'title': title,
                'image': image,
                'file_id': file_id,
                'text': subtext,
                'url': url,
                'order': order,
                'createdAt': datetime,
                'thumbnail': thumbnail_file_id
            })

        return item
    def test_gridfs_replica_set(self):
        rsc = rs_client(w=self.w, read_preference=ReadPreference.SECONDARY)

        gfs = gridfs.GridFSBucket(rsc.pymongo_test)
        oid = gfs.upload_from_stream("test_filename", b'foo')
        content = gfs.open_download_stream(oid).read()
        self.assertEqual(b'foo', content)
예제 #10
0
 def setUpClass(cls):
     super(TestAllScenarios, cls).setUpClass()
     cls.fs = gridfs.GridFSBucket(cls.db)
     cls.str_to_cmd = {
         "upload": cls.fs.upload_from_stream,
         "download": cls.fs.open_download_stream,
         "delete": cls.fs.delete,
         "download_by_name": cls.fs.open_download_stream_by_name}
예제 #11
0
def gridfs_file_model(db):
    fs = gridfs.GridFSBucket(db)
    f_to_display = []
    for f in fs.find():
        tempFS = FS.build_from_GridFS(f)
        print("FS converted: {0}".format(tempFS))
        f_to_display.append(f)
    return f_to_display
예제 #12
0
def UpGridFSDicom(path, db):
    fs = gridfs.GridFSBucket(db)
    image_list = [os.path.basename(x) for x in glob(path + './*.dicom')]
    for f in image_list:
        with open(path + f, 'rb') as dicom:
            f_id = fs.upload_from_stream(f, dicom)
        print(f'{f_id} {f}')
    print('Done')
예제 #13
0
    def _save_thread(self, save_filters_permanent, save_filters_tmp, save_rec, step, save_to_gfs):
        if save_filters_permanent or save_filters_tmp:
            save_rec['saved_filters'] = True
            save_path = os.path.join(self.cache_dir, 'checkpoint')
            log.info('Saving model with path prefix %s ... ' % save_path)
            saved_path = self.tf_saver.save(self.sess,
                                            save_path=save_path,
                                            global_step=step,
                                            write_meta_graph=False)
            log.info('... done saving with path prefix %s' % saved_path)
            putfs = self.collfs if save_filters_permanent else self.collfs_recent
            log.info('Putting filters into %s database' % repr(putfs))
            save_rec['_saver_write_version'] = self.tf_saver._write_version
            if self.tf_saver._write_version == saver_pb2.SaverDef.V2:
                file_data = get_saver_pb2_v2_files(saved_path)
                save_rec['_saver_num_data_files'] = file_data['num_data_files']
                tarfilepath = saved_path + '.tar'
                tar = tarfile.open(tarfilepath, 'w')
                for _f in file_data['files']:
                    tar.add(_f, arcname=os.path.split(_f)[1])
                tar.close()
                with open(tarfilepath, 'rb') as _fp:
                    outrec = putfs.put(_fp, filename=tarfilepath, **save_rec)
            else:
                with open(saved_path, 'rb') as _fp:
                    outrec = putfs.put(_fp, filename=saved_path, **save_rec)
            log.info('... done putting filters into database.')

            if not save_filters_permanent:
                recent_gridfs_files = self.collfs_recent._GridFS__files
                recent_query_result = recent_gridfs_files.find({'saved_filters': True}, sort=[('uploadDate', 1)])
                num_cached_filters = recent_query_result.count()
                cache_max_num = self.cache_max_num
                if num_cached_filters > cache_max_num:
                    log.info('Cleaning up cached filters')
                    fsbucket = gridfs.GridFSBucket(recent_gridfs_files._Collection__database, bucket_name=recent_gridfs_files.name.split('.')[0])

                    for del_indx in xrange(0, num_cached_filters - cache_max_num):
                        #log.info(recent_query_result[del_indx]['uploadDate'])
                        fsbucket.delete(recent_query_result[del_indx]['_id'])

        if not save_filters_permanent:
            save_rec['saved_filters'] = False
            log.info('Inserting record into database.')
            outrec = self.collfs._GridFS__files.insert_one(save_rec)

        if not isinstance(outrec, ObjectId):
            outrec = outrec.inserted_id

        if save_to_gfs:
            idval = str(outrec)
            save_to_gfs_path = idval + "_fileitems"
            self.collfs.put(cPickle.dumps(save_to_gfs),
                            filename=save_to_gfs_path, item_for=outrec)

        sys.stdout.flush()  # flush the stdout buffer
        self.outrecs.append(outrec)
예제 #14
0
    def __init__(self, dbname, colname, exp_id, port, cache_dir):
        self.exp_id = exp_id
        self.conn = conn = pm.MongoClient(port=port)

        self.coll = conn[dbname][colname + '.files']
        self.collfs = gridfs.GridFS(conn[dbname], colname)
        self.fs_bucket = gridfs.GridFSBucket(conn[dbname], colname)

        self.load_files_dir = os.path.join(cache_dir, dbname, colname, exp_id)
예제 #15
0
def Gridup():
    db = client.kaggle1
    path = photo_test
    fs = gridfs.GridFSBucket(db)
    file_list = os.listdir(photo_test)
    # file_list = [os.path.basename(x) for x in glob(path + './*.jpg')] ## 경로 안 파일의 이름만 리스트로 넣음
    for file in file_list:
        with open(path + '\\'+file, 'rb') as jpg:
            fs.upload_from_stream(file, jpg) # fs로 업로드
예제 #16
0
def save_file_from_db(file_id, destination, db_uri, db_name):
    """
    Given a file_id (e.g. through get_file_id()) and a db_uri (a db connection string),
    save the corresponding file to `destination` (filename as string).
    """
    client = pymongo.MongoClient(db_uri, ssl=True)
    fs = gridfs.GridFSBucket(client[db_name])
    open_file = open(destination, 'wb+')
    fs.download_to_stream(file_id, open_file)
예제 #17
0
def Griddown():
    db = client.kaggle1
    # 저장할 위치
    path = 'C:\\Users\\w\\jupyter\\kaggle\\input\\ranzcr-clip-catheter-line-classification\\downtest'
    fs = gridfs.GridFSBucket(db)
    for data in db.fs.files.find({}, {'filename':True}):
        filename = data['filename']
        with open(path +'\\' +filename, 'wb') as jpg:
            fs.download_to_stream_by_name(data['filename'], jpg)
    print('Done')
예제 #18
0
 def _erase_gridfs(self):
     """
     Erase gridfs database
     Params:
     - none
     Returns:
     - none
     """
     fs_db = gridfs.GridFSBucket(self._db)
     for gridout in fs_db.find():
         fs_db.delete(gridout._id)
예제 #19
0
 def __init__(self, uri: str) -> None:
     """Initialize the mongodb connection and grab pointers to the databases
        uri is the location of the database in a mongodb compatible form.
        http://dochub.mongodb.org/core/connections.
     """
     # Note: Need "connect=False" so that we don't connect until the first
     # time we interact with the database. Required for the gem5 running
     # celery server
     self.db = MongoClient(host=uri, connect=False).artifact_database
     self.artifacts = self.db.artifacts
     self.fs = gridfs.GridFSBucket(self.db, disable_md5=True)
예제 #20
0
    def removeBlobsEntry(self, entry, i, db):
        dbId = entry['id']
        collectionName = entry['collection']
        fs = gridfs.GridFSBucket(db, bucket_name=collectionName)
        print("%i. Removing blob %s" % (i, dbId))

        try:
            fs.delete(dbId)
            print("%i. Removed blob %s" % (i, dbId))
        except:
            print("%i. Failed removing blob %s" % (i, dbId))
예제 #21
0
def getPhoto(dbFileName):
    client = pymongo.MongoClient(
        "mongodb+srv://erinruby:[email protected]/test?retryWrites=true"
    )  #ERIN's LOGIN
    db = client.prototype  #name of the db
    col = client.people  #name of the collection
    # fs=gridfs.GridFS(db)
    fs = gridfs.GridFSBucket(db)
    if (not os.path.isfile(
            '../form_ui/static/tempImage/{0}'.format(dbFileName))):
        file = open('../form_ui/static/tempImage/{0}'.format(dbFileName), 'wb')
        fs.download_to_stream_by_name(dbFileName, file)
예제 #22
0
 def removeBlobs(self):
     with open(self.outDir + "/log.csv") as csvfile:
         db = MongoClient(host=self.host, port=self.port)[self.db]
         reader = csv.reader(csvfile, delimiter=',')
         for row in reader:
             dbId = row[0]
             collectionName = row[2]
             fs = gridfs.GridFSBucket(db, bucket_name=collectionName)
             try:
                 fs.delete(dbId)
             except:
                 continue
예제 #23
0
 def test_multi_chunk_delete(self):
     self.assertEqual(0, self.db.fs.files.count_documents({}))
     self.assertEqual(0, self.db.fs.chunks.count_documents({}))
     gfs = gridfs.GridFSBucket(self.db)
     oid = gfs.upload_from_stream("test_filename",
                                  b"hello",
                                  chunk_size_bytes=1)
     self.assertEqual(1, self.db.fs.files.count_documents({}))
     self.assertEqual(5, self.db.fs.chunks.count_documents({}))
     gfs.delete(oid)
     self.assertEqual(0, self.db.fs.files.count_documents({}))
     self.assertEqual(0, self.db.fs.chunks.count_documents({}))
예제 #24
0
def main():
    with open("article_links") as f:
        links = f.readlines()

    db = MongoClient().aaronsw
    bucket = gridfs.GridFSBucket(db)

    for link in links:
        html = html(link)
        bucket.upload_from_stream(link,
                                  html.encode('utf-8', 'strict'),
                                  metadata={"contentType": "text/html"})
        time.sleep(10)  # pause between requests
예제 #25
0
    def test_gridfs_secondary(self):
        secondary_host, secondary_port = one(self.client.secondaries)
        secondary_connection = single_client(
            secondary_host, secondary_port,
            read_preference=ReadPreference.SECONDARY)

        # Should detect it's connected to secondary and not attempt to
        # create index
        gfs = gridfs.GridFSBucket(
            secondary_connection.gfsbucketreplica, 'gfsbucketsecondarytest')

        # This won't detect secondary, raises error
        self.assertRaises(NotPrimaryError, gfs.upload_from_stream,
                          "test_filename", b'foo')
예제 #26
0
    def dumpfiles(self, collection, store):
        mime = MimeTypes()

        db = self.getdb()
        uploadsCollection = db[collection]
        fs = gridfs.GridFSBucket(db, bucket_name=collection)

        uploads = uploadsCollection.find({}, no_cursor_timeout=True)

        i = 0
        for upload in uploads:
            if upload["store"] == "GridFS:Uploads":
                gridfsId = upload['_id']
                if "complete" in upload and upload["complete"] is True:
                    for res in fs.find({"_id": gridfsId}):
                        data = res.read()
                        filename = gridfsId
                        fileext = ""

                        if "extension" in upload and upload["extension"] != "":
                            fileext = "." + upload["extension"]
                        else:
                            fileext = mime.guess_extension(res.content_type)

                        if fileext is not None and fileext != "":
                            filename = filename + fileext

                        i += 1
                        print("%i. Dumping %s %s" %
                              (i, gridfsId, upload['name']))
                        key = store.put(filename, data, upload)
                        print("%i. Finished dumping %s %s" %
                              (i, gridfsId, upload['name']))
                        logitem = {
                            "id": gridfsId,
                            "file": filename,
                            "collection": collection,
                            "md5": res.md5,
                            "key": key
                        }
                        self.updateDbEntry(logitem, i, db)
                        self.removeBlobsEntry(logitem, i, db)

                        self.addtolog(logitem)
                        self.writelog()
                        self.log.pop()
                else:
                    print("[Warning] Skipping incomplete upload %s" %
                          (gridfsId),
                          file=sys.stderr)
예제 #27
0
def count_gridfs(db):
    """ Summary of procesed files in database
    Params:
    -db
    Returns:
    -  Print stats from the dabase
    """
    print("\n********************")
    print("***COUNT GRIDFS***")
    print("********************")
    fs = gridfs.GridFSBucket(db)
    count = 0
    for gridout in fs.find():
        count = count + 1
    print("\n--->" + str(count) + " items in GriFS\n")
예제 #28
0
 def periodic_vt(self):
     """
     empty
     """
     while True:
         fs = gridfs.GridFSBucket(self._db)
         for gridout in fs.find():
             id_malware = gridout.filename
             malware = self._model_malware.select_one_malware_collection(id_malware)
             if malware["vt_report_malware"] is not True:
                 md5 = self._model_gridfs.get_file_from_gridfs(id_malware).md5
                 if self._get_virustotal(md5) is not None:
                     self._insert_vt(id_malware, md5)
                 else:
                     self._view.display_missing_item_error(id_malware, "vt")
                 time.sleep(25)
예제 #29
0
    def __init__(self, database: pymongo.database.Database):
        self._database = database
        migrate.ensure_up_to_date(database, migrations.LATEST)

        self._data_collection = database[self.DATA_COLLECTION]
        self._history_collection = database[self.HISTORY_COLLECTION]
        self._meta_collection = database[self.META_COLLECTION]
        self._file_bucket = gridfs.GridFSBucket(database)
        self._refman = references.ReferenceManager(database[DEFAULT_REFERENCES_COLLECTION],
                                                   self._data_collection, self._history_collection)

        self._snapshots = MongoRecordCollection(self, self._history_collection,
                                                self._meta_collection.name)
        self._objects = MongoRecordCollection(self, self._data_collection,
                                              self._meta_collection.name)

        self._create_indices()
    def test_gridfs_secondary_lazy(self):
        # Should detect it's connected to secondary and not attempt to
        # create index.
        secondary_host, secondary_port = one(self.secondaries)
        client = single_client(secondary_host,
                               secondary_port,
                               read_preference=ReadPreference.SECONDARY,
                               connect=False)

        # Still no connection.
        gfs = gridfs.GridFSBucket(client.test_gridfs_secondary_lazy)

        # Connects, doesn't create index.
        self.assertRaises(NoFile, gfs.open_download_stream_by_name,
                          "test_filename")
        self.assertRaises(ConnectionFailure, gfs.upload_from_stream,
                          "test_filename", b'data')