def test_gridfs_lazy_connect(self): client = MongoClient('badhost', connect=False, serverSelectionTimeoutMS=0) cdb = client.db gfs = gridfs.GridFSBucket(cdb) self.assertRaises(ServerSelectionTimeoutError, gfs.delete, 0) gfs = gridfs.GridFSBucket(cdb) self.assertRaises( ServerSelectionTimeoutError, gfs.upload_from_stream, "test", b"") # Still no connection.
def read_model(self, collection, id): """Reads a serialized sklearn model from MongoDB. Args: collection (str): collection name id (ObjectId): the id of the file Returns: list """ #Establish connection if self.client is None: self.create_client() #Connect to db db = self.client[self.db] fs = gridfs.GridFSBucket(db, collection) try: with fs.open_download_stream(id) as handler: out = handler.read() self.logger.info(f'[GRIDFS_CHUNK_NUM]: {len(out)} chunks are returned from [COLLECTION]: {collection} with [ID]: {id} ') except Exception as e: self.logger.error('MongoDB read failed: {}'.format(e)) raise return out
def __init__(self, database, collection='fs', logfile=None, debug=os.environ.get('GRIDFS_FUSE_DEBUG'), filename_encoding='utf-8'): super().__init__() self.logger = logging.getLogger("gridfs_fuse") self.logger.setLevel(logging.DEBUG if debug else logging.ERROR) try: self.handler = logging.FileHandler(logfile) self.handler.setLevel(logging.DEBUG) except: pass #self._readonly = True self._readonly = False self._database = database self._collection = collection self._filename_encoding = filename_encoding self.gridfs = gridfs.GridFS(database, collection) self.gridfsbucket = gridfs.GridFSBucket(database, collection) self.gridfs_files = compat_collection(database, collection + '.files') self.inode2id = {pyfuse3.ROOT_INODE: '0'} self.id2inode = {'0': pyfuse3.ROOT_INODE} self._last_inode = pyfuse3.ROOT_INODE + 1 self.root_stamp = time_ns() self.active_inodes = collections.defaultdict(int) self.active_writes = {}
def __init__(self) -> None: # Note: Need "connect=False" so that we don't connect until the first # time we interact with the database. Required for the gem5 running # celery server self.db = MongoClient(connect=False).artifact_database self.artifacts = self.db.artifacts self.fs = gridfs.GridFSBucket(self.db, disable_md5=True)
def DownGridFSDicom(path, db): fs = gridfs.GridFSBucket(db) for data in db.fs.files.find({}, {'filename': True}): filename = data['filename'] with open(path + filename, 'wb') as dicom: fs.download_to_stream_by_name(data['filename'], dicom) print('Done')
def mongodb_gridfs(connector_access, local_result_file, meta_data): local_file_path = os.path.join(local_result_file['dir'], local_result_file['name']) client = _mongodb_client(connector_access) db = client[connector_access['db']] fs = gridfs.GridFSBucket(db) md = connector_access.get('metadata') if meta_data: if not md: md = {} for key, val in meta_data.items(): try: md[key] = [ObjectId(val)] except: md[key] = val with open(local_file_path, 'rb') as f: fs.upload_from_stream(connector_access.get('file_name', str(uuid.uuid4())), f, chunk_size_bytes=4096, metadata=md) client.close()
def dumpfiles(self, collection): mime = MimeTypes() db = MongoClient(host=self.host, port=self.port)[self.db] uploadsCollection = db[collection] fs = gridfs.GridFSBucket(db, bucket_name=collection) uploads = uploadsCollection.find({}, no_cursor_timeout=True) for upload in uploads: if upload["store"] == "GridFS:Uploads": if "complete" in upload and upload["complete"] is True: path = upload["path"] pathSegments = path.split("/") gridfsId = pathSegments[3] for res in fs.find({"_id": gridfsId}): data = res.read() fileext = "" if "extension" in upload: fileext = upload["extension"] else: fileext = mime.guess_extension(res.content_type) if fileext is not None and fileext != "": filename = gridfsId + "." + fileext else: filename = gridfsId file = open(self.outDir + "/" + filename, "wb") file.write(data) file.close() self.addtolog(gridfsId, filename, collection, res.md5) else: print(upload) self.writelog()
def process_item(self, item, spider): title = item['title'] or "" comic = item['comic'] image = item['image'] thumbnail = item['thumbnail'] subtext = item['subtext'] url = item['url'] order = item['order'] datetime = item['createdAt'] if 'createdAt' in item else None mongodb_item = self.collection.find_one({'comic': comic, 'url': url}) if not mongodb_item: fs = gridfs.GridFSBucket(self.db) file_id = fs.upload_from_stream(image, open(image)) thumbnail_file_id = fs.upload_from_stream(image, open(thumbnail)) self.collection.insert({ 'comic': comic, 'title': title, 'image': image, 'file_id': file_id, 'text': subtext, 'url': url, 'order': order, 'createdAt': datetime, 'thumbnail': thumbnail_file_id }) return item
def test_gridfs_replica_set(self): rsc = rs_client(w=self.w, read_preference=ReadPreference.SECONDARY) gfs = gridfs.GridFSBucket(rsc.pymongo_test) oid = gfs.upload_from_stream("test_filename", b'foo') content = gfs.open_download_stream(oid).read() self.assertEqual(b'foo', content)
def setUpClass(cls): super(TestAllScenarios, cls).setUpClass() cls.fs = gridfs.GridFSBucket(cls.db) cls.str_to_cmd = { "upload": cls.fs.upload_from_stream, "download": cls.fs.open_download_stream, "delete": cls.fs.delete, "download_by_name": cls.fs.open_download_stream_by_name}
def gridfs_file_model(db): fs = gridfs.GridFSBucket(db) f_to_display = [] for f in fs.find(): tempFS = FS.build_from_GridFS(f) print("FS converted: {0}".format(tempFS)) f_to_display.append(f) return f_to_display
def UpGridFSDicom(path, db): fs = gridfs.GridFSBucket(db) image_list = [os.path.basename(x) for x in glob(path + './*.dicom')] for f in image_list: with open(path + f, 'rb') as dicom: f_id = fs.upload_from_stream(f, dicom) print(f'{f_id} {f}') print('Done')
def _save_thread(self, save_filters_permanent, save_filters_tmp, save_rec, step, save_to_gfs): if save_filters_permanent or save_filters_tmp: save_rec['saved_filters'] = True save_path = os.path.join(self.cache_dir, 'checkpoint') log.info('Saving model with path prefix %s ... ' % save_path) saved_path = self.tf_saver.save(self.sess, save_path=save_path, global_step=step, write_meta_graph=False) log.info('... done saving with path prefix %s' % saved_path) putfs = self.collfs if save_filters_permanent else self.collfs_recent log.info('Putting filters into %s database' % repr(putfs)) save_rec['_saver_write_version'] = self.tf_saver._write_version if self.tf_saver._write_version == saver_pb2.SaverDef.V2: file_data = get_saver_pb2_v2_files(saved_path) save_rec['_saver_num_data_files'] = file_data['num_data_files'] tarfilepath = saved_path + '.tar' tar = tarfile.open(tarfilepath, 'w') for _f in file_data['files']: tar.add(_f, arcname=os.path.split(_f)[1]) tar.close() with open(tarfilepath, 'rb') as _fp: outrec = putfs.put(_fp, filename=tarfilepath, **save_rec) else: with open(saved_path, 'rb') as _fp: outrec = putfs.put(_fp, filename=saved_path, **save_rec) log.info('... done putting filters into database.') if not save_filters_permanent: recent_gridfs_files = self.collfs_recent._GridFS__files recent_query_result = recent_gridfs_files.find({'saved_filters': True}, sort=[('uploadDate', 1)]) num_cached_filters = recent_query_result.count() cache_max_num = self.cache_max_num if num_cached_filters > cache_max_num: log.info('Cleaning up cached filters') fsbucket = gridfs.GridFSBucket(recent_gridfs_files._Collection__database, bucket_name=recent_gridfs_files.name.split('.')[0]) for del_indx in xrange(0, num_cached_filters - cache_max_num): #log.info(recent_query_result[del_indx]['uploadDate']) fsbucket.delete(recent_query_result[del_indx]['_id']) if not save_filters_permanent: save_rec['saved_filters'] = False log.info('Inserting record into database.') outrec = self.collfs._GridFS__files.insert_one(save_rec) if not isinstance(outrec, ObjectId): outrec = outrec.inserted_id if save_to_gfs: idval = str(outrec) save_to_gfs_path = idval + "_fileitems" self.collfs.put(cPickle.dumps(save_to_gfs), filename=save_to_gfs_path, item_for=outrec) sys.stdout.flush() # flush the stdout buffer self.outrecs.append(outrec)
def __init__(self, dbname, colname, exp_id, port, cache_dir): self.exp_id = exp_id self.conn = conn = pm.MongoClient(port=port) self.coll = conn[dbname][colname + '.files'] self.collfs = gridfs.GridFS(conn[dbname], colname) self.fs_bucket = gridfs.GridFSBucket(conn[dbname], colname) self.load_files_dir = os.path.join(cache_dir, dbname, colname, exp_id)
def Gridup(): db = client.kaggle1 path = photo_test fs = gridfs.GridFSBucket(db) file_list = os.listdir(photo_test) # file_list = [os.path.basename(x) for x in glob(path + './*.jpg')] ## 경로 안 파일의 이름만 리스트로 넣음 for file in file_list: with open(path + '\\'+file, 'rb') as jpg: fs.upload_from_stream(file, jpg) # fs로 업로드
def save_file_from_db(file_id, destination, db_uri, db_name): """ Given a file_id (e.g. through get_file_id()) and a db_uri (a db connection string), save the corresponding file to `destination` (filename as string). """ client = pymongo.MongoClient(db_uri, ssl=True) fs = gridfs.GridFSBucket(client[db_name]) open_file = open(destination, 'wb+') fs.download_to_stream(file_id, open_file)
def Griddown(): db = client.kaggle1 # 저장할 위치 path = 'C:\\Users\\w\\jupyter\\kaggle\\input\\ranzcr-clip-catheter-line-classification\\downtest' fs = gridfs.GridFSBucket(db) for data in db.fs.files.find({}, {'filename':True}): filename = data['filename'] with open(path +'\\' +filename, 'wb') as jpg: fs.download_to_stream_by_name(data['filename'], jpg) print('Done')
def _erase_gridfs(self): """ Erase gridfs database Params: - none Returns: - none """ fs_db = gridfs.GridFSBucket(self._db) for gridout in fs_db.find(): fs_db.delete(gridout._id)
def __init__(self, uri: str) -> None: """Initialize the mongodb connection and grab pointers to the databases uri is the location of the database in a mongodb compatible form. http://dochub.mongodb.org/core/connections. """ # Note: Need "connect=False" so that we don't connect until the first # time we interact with the database. Required for the gem5 running # celery server self.db = MongoClient(host=uri, connect=False).artifact_database self.artifacts = self.db.artifacts self.fs = gridfs.GridFSBucket(self.db, disable_md5=True)
def removeBlobsEntry(self, entry, i, db): dbId = entry['id'] collectionName = entry['collection'] fs = gridfs.GridFSBucket(db, bucket_name=collectionName) print("%i. Removing blob %s" % (i, dbId)) try: fs.delete(dbId) print("%i. Removed blob %s" % (i, dbId)) except: print("%i. Failed removing blob %s" % (i, dbId))
def getPhoto(dbFileName): client = pymongo.MongoClient( "mongodb+srv://erinruby:[email protected]/test?retryWrites=true" ) #ERIN's LOGIN db = client.prototype #name of the db col = client.people #name of the collection # fs=gridfs.GridFS(db) fs = gridfs.GridFSBucket(db) if (not os.path.isfile( '../form_ui/static/tempImage/{0}'.format(dbFileName))): file = open('../form_ui/static/tempImage/{0}'.format(dbFileName), 'wb') fs.download_to_stream_by_name(dbFileName, file)
def removeBlobs(self): with open(self.outDir + "/log.csv") as csvfile: db = MongoClient(host=self.host, port=self.port)[self.db] reader = csv.reader(csvfile, delimiter=',') for row in reader: dbId = row[0] collectionName = row[2] fs = gridfs.GridFSBucket(db, bucket_name=collectionName) try: fs.delete(dbId) except: continue
def test_multi_chunk_delete(self): self.assertEqual(0, self.db.fs.files.count_documents({})) self.assertEqual(0, self.db.fs.chunks.count_documents({})) gfs = gridfs.GridFSBucket(self.db) oid = gfs.upload_from_stream("test_filename", b"hello", chunk_size_bytes=1) self.assertEqual(1, self.db.fs.files.count_documents({})) self.assertEqual(5, self.db.fs.chunks.count_documents({})) gfs.delete(oid) self.assertEqual(0, self.db.fs.files.count_documents({})) self.assertEqual(0, self.db.fs.chunks.count_documents({}))
def main(): with open("article_links") as f: links = f.readlines() db = MongoClient().aaronsw bucket = gridfs.GridFSBucket(db) for link in links: html = html(link) bucket.upload_from_stream(link, html.encode('utf-8', 'strict'), metadata={"contentType": "text/html"}) time.sleep(10) # pause between requests
def test_gridfs_secondary(self): secondary_host, secondary_port = one(self.client.secondaries) secondary_connection = single_client( secondary_host, secondary_port, read_preference=ReadPreference.SECONDARY) # Should detect it's connected to secondary and not attempt to # create index gfs = gridfs.GridFSBucket( secondary_connection.gfsbucketreplica, 'gfsbucketsecondarytest') # This won't detect secondary, raises error self.assertRaises(NotPrimaryError, gfs.upload_from_stream, "test_filename", b'foo')
def dumpfiles(self, collection, store): mime = MimeTypes() db = self.getdb() uploadsCollection = db[collection] fs = gridfs.GridFSBucket(db, bucket_name=collection) uploads = uploadsCollection.find({}, no_cursor_timeout=True) i = 0 for upload in uploads: if upload["store"] == "GridFS:Uploads": gridfsId = upload['_id'] if "complete" in upload and upload["complete"] is True: for res in fs.find({"_id": gridfsId}): data = res.read() filename = gridfsId fileext = "" if "extension" in upload and upload["extension"] != "": fileext = "." + upload["extension"] else: fileext = mime.guess_extension(res.content_type) if fileext is not None and fileext != "": filename = filename + fileext i += 1 print("%i. Dumping %s %s" % (i, gridfsId, upload['name'])) key = store.put(filename, data, upload) print("%i. Finished dumping %s %s" % (i, gridfsId, upload['name'])) logitem = { "id": gridfsId, "file": filename, "collection": collection, "md5": res.md5, "key": key } self.updateDbEntry(logitem, i, db) self.removeBlobsEntry(logitem, i, db) self.addtolog(logitem) self.writelog() self.log.pop() else: print("[Warning] Skipping incomplete upload %s" % (gridfsId), file=sys.stderr)
def count_gridfs(db): """ Summary of procesed files in database Params: -db Returns: - Print stats from the dabase """ print("\n********************") print("***COUNT GRIDFS***") print("********************") fs = gridfs.GridFSBucket(db) count = 0 for gridout in fs.find(): count = count + 1 print("\n--->" + str(count) + " items in GriFS\n")
def periodic_vt(self): """ empty """ while True: fs = gridfs.GridFSBucket(self._db) for gridout in fs.find(): id_malware = gridout.filename malware = self._model_malware.select_one_malware_collection(id_malware) if malware["vt_report_malware"] is not True: md5 = self._model_gridfs.get_file_from_gridfs(id_malware).md5 if self._get_virustotal(md5) is not None: self._insert_vt(id_malware, md5) else: self._view.display_missing_item_error(id_malware, "vt") time.sleep(25)
def __init__(self, database: pymongo.database.Database): self._database = database migrate.ensure_up_to_date(database, migrations.LATEST) self._data_collection = database[self.DATA_COLLECTION] self._history_collection = database[self.HISTORY_COLLECTION] self._meta_collection = database[self.META_COLLECTION] self._file_bucket = gridfs.GridFSBucket(database) self._refman = references.ReferenceManager(database[DEFAULT_REFERENCES_COLLECTION], self._data_collection, self._history_collection) self._snapshots = MongoRecordCollection(self, self._history_collection, self._meta_collection.name) self._objects = MongoRecordCollection(self, self._data_collection, self._meta_collection.name) self._create_indices()
def test_gridfs_secondary_lazy(self): # Should detect it's connected to secondary and not attempt to # create index. secondary_host, secondary_port = one(self.secondaries) client = single_client(secondary_host, secondary_port, read_preference=ReadPreference.SECONDARY, connect=False) # Still no connection. gfs = gridfs.GridFSBucket(client.test_gridfs_secondary_lazy) # Connects, doesn't create index. self.assertRaises(NoFile, gfs.open_download_stream_by_name, "test_filename") self.assertRaises(ConnectionFailure, gfs.upload_from_stream, "test_filename", b'data')