def test_post_grid_calendar_returns_success_status(app, coverage, get_app_context): filename = 'export_calendars.zip' path = os.path.join( os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'fixtures/gridcalendar/', filename) files = {'file': (open(path, 'rb'), 'export_calendars.zip')} raw = app.post('/coverages/jdr/grid_calendar', data=files) r = to_json(raw) assert raw.status_code == 200 assert r.get('message') == 'OK' raw = app.get('/coverages') r = to_json(raw) assert len(r['coverages']) == 1 assert 'grid_calendars_id' in r['coverages'][0] gridfs = GridFS(mongo.db) file_id = r['coverages'][0]['grid_calendars_id'] assert gridfs.exists(ObjectId(file_id)) #we update the file (it's the same, but that's not the point) files = {'file': (open(path, 'rb'), 'export_calendars.zip')} raw = app.post('/coverages/jdr/grid_calendar', data=files) assert raw.status_code == 200 raw = app.get('/coverages') r = to_json(raw) assert len(r['coverages']) == 1 assert 'grid_calendars_id' in r['coverages'][0] #it should be another file assert file_id != r['coverages'][0]['grid_calendars_id'] #the previous file has been deleted assert not gridfs.exists(ObjectId(file_id)) #and the new one exist assert gridfs.exists(ObjectId(r['coverages'][0]['grid_calendars_id']))
def main_config(): if (not session.get('logged_in')): return redirect(url_for('login')) access = check_access('main_config') if (request.method == 'POST' and access != 'rw'): abort(403) filenames = {'pubkey': 'rsa_1024_pub.pem', 'privkey': 'rsa_1024_priv.pem'} certs = None if (request.method == 'POST'): changes = to_dict(request.form) if (changes['action'] == 'configurations'): del (changes['action']) mongodb.db.configs.update_one({'name': changes['name']}, {'$set': changes}) elif (changes['action'] == 'genkeys'): certs = genkeypair() gridfsdb = database.Database( MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs') fs = GridFS(gridfsdb) for key in ['privkey', 'pubkey']: oldfile = fs.find_one({'filename': filenames[key]}) if (oldfile is not None): fs.delete(oldfile._id) fs.put(certs[key].copy(), content_type="text/plain", filename=filenames[key]) result = mongodb.db.configs.find({}, {'_id': 0}) gridfsdb = database.Database( MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'images') fs = GridFS(gridfsdb) avatar = fs.exists(filename='avatar.png') background = fs.exists(filename='background.png') logo = fs.exists(filename='logo.png') imgresult = {'avatar': avatar, 'background': background, 'logo': logo} if (certs is None): gridfsdb = database.Database( MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs') fs = GridFS(gridfsdb) if (fs.exists(filename=filenames['pubkey'])): file = fs.get_last_version(filenames['pubkey']) pubkey = file.read() certs = {'pubkey': pubkey} languages = copy_cursor( mongodb.db.languages.find({}, sort=([('name', 1), ('variant', 1)]))) return render_template('main_config.html', access=access, images=imgresult, configs=result, certs=certs, languages=languages)
def test_delete_file_from_GridFS(self): content = "File content" gridfs = GridFS(self.db, collection=config.MONGODB_CONFIG['gridfs_collection']) new_file_id = gridfs.put(content) expected_file_data = gridfs.get(new_file_id) self.document['file_id'] = str(new_file_id) self.assertTrue(gridfs.exists(new_file_id)) GridFSFileDeleter().delay(self.fake_id) self.assertFalse(gridfs.exists(new_file_id))
def upLoadFile(self, file_id, file_name, class_name, host, author, is_published, file_type, file_data, remark, url): client = pymongo.MongoClient(self.db_url) db = client[self.db] # file_id = str(uuid.uuid1()) filter_condition = { "file_id": file_id, "file_name": file_name, "class_name": class_name, # "file_path": file_path, # "url": "http://" + host + ":10018/files/download/" + file_id, "url": url, "host": host, "author": author, "file_type": file_type, "is_published": is_published, "download": 0, "remark": remark } gridfs_col = GridFS(db, collection="file_info") query = {"file_name": "", "author": ""} query["file_name"] = file_name query["author"] = author if gridfs_col.exists(query): result = {"code": "20000699", "message": "文件已经存在"} else: try: # with open(file_path, 'rb') as file_r: # file_data = file_data gridfs_col.put(data=file_data, **filter_condition) result = {"code": "20000600", "message": "文件上传成功"} except Exception as e: result = {"code": "20000699", "message": "文件上传失败,原因:" + e} return result
class GridFSStorage(Storage): def __init__(self, host='localhost', port=27017, collection='fs'): for s in ('host', 'port', 'collection'): name = 'GRIDFS_' + s.upper() if hasattr(settings, name): setattr(self, s, getattr(settings, name)) for s, v in zip(('host', 'port', 'collection'), (host, port, collection)): if v: setattr(self, s, v) self.db = Connection(host=self.host, port=self.port)[self.collection] self.fs = GridFS(self.db) def _save(self, name, content): self.fs.put(content, filename=name) return name def _open(self, name, *args, **kwars): return self.fs.get_last_version(filename=name) def delete(self, name): oid = fs.get_last_version(filename=name)._id self.fs.delete(oid) def exists(self, name): return self.fs.exists({'filename': name}) def size(self, name): return self.fs.get_last_version(filename=name).length
def isExists(self, file_coll, filename): gridfs_col = GridFS(self.db, collection=file_coll) query = {"filename": ""} query["filename"] = filename if gridfs_col.exists(query): print(f'{filename}存在')
def test_it_saves_files(self): fileobj = BytesIO(b"these are the bytes") self.mongo.save_file("my-file", fileobj) gridfs = GridFS(self.mongo.db) assert gridfs.exists({"filename": "my-file"})
def consulta(year, month, terminal, arquivo, id): """Script de linha de comando para consulta do arquivo XML.""" fs = GridFS(db) _ids = [] if id: _ids.append(ObjectId(id)) else: filtro = {'metadata.contentType': 'text/xml'} if year and month: data_inicio = datetime(year, month, 1) filtro['metadata.dataescaneamento'] = {'$gt': data_inicio} if terminal: filtro['metadata.recinto'] = terminal if arquivo: filtro['filename'] = {'$regex': arquivo} cursor = db['fs.files'].find(filtro).limit(10) _ids = [row['_id'] for row in cursor] for _id in _ids: if fs.exists(_id): grid_out = fs.get(_id) raw = grid_out.read() encode = chardet.detect(raw) try: xml = raw.decode(encode['encoding']) print(xml) print(xmli.xml_todict(xml)) except Exception as err: print(err)
def setupcode(): if (not session.get('logged_in')): return redirect(url_for('login')) gridfsdb = database.Database( MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs') fs = GridFS(gridfsdb) filename = 'rsa_1024_pub.pem' if (fs.exists(filename=filename)): file = fs.get_last_version(filename) pubkey = file.read().replace('\n', '').replace( '-----BEGIN PUBLIC KEY-----', '').replace('-----END PUBLIC KEY-----', '') server_address = mongodb.db.configs.find_one({}, { '_id': 0, 'server_address': 1 })['server_address'] qrdata = pyqrcode.create(server_address + '|' + pubkey, mode='binary') output = StringIO() qrdata.svg(output, scale=6) contents = output.getvalue() output.close() response = virtualrest.response_class(contents, direct_passthrough=True, mimetype='image/svg+xml') response.headers.set('Content-Length', len(contents)) return response
def test_it_saves_files(self): fileobj = BytesIO(b"these are the bytes") self.mongo.save_file("my-file", fileobj) gridfs = GridFS(self.mongo.db) assert gridfs.exists({"filename": "my-file"})
class GridFSStorage(Storage): def __init__(self, host='localhost', port=27017, collection='fs'): for s in ('host', 'port', 'collection'): name = 'GRIDFS_' + s.upper() if hasattr(settings, name): setattr(self, s, getattr(settings, name)) for s, v in zip(('host', 'port', 'collection'), (host, port, collection)): if v: setattr(self, s, v) self.db = Connection(host=self.host, port=self.port)[self.collection] self.fs = GridFS(self.db) def _save(self, name, content): self.fs.put(content, filename=name) return name def _open(self, name, *args, **kwars): return self.fs.get_last_version(filename=name) def delete(self, name): oid = fs.get_last_version(filename=name)._id self.fs.delete(oid) def exists(self, name): return self.fs.exists({'filename': name}) def size(self, name): return self.fs.get_last_version(filename=name).length
class ObjectDB: def __init__(self, db): from gridfs import GridFS self.gridfs = GridFS(db) def __setitem__(self, key, obj): self.save(obj, key) def __getitem__(self, key): return self.load(key) def __delitem__(self, key): from pymongo.objectid import ObjectId if not isinstance(key, ObjectId): id = self.gridfs.get_last_version(key)._id else: id = key self.gridfs.delete(id) def __repr__(self): return "Key-value database" def keys(self): """Return list of filenames of objects in the gridfs store.""" return self.gridfs.list() def object_ids(self): """Return list of id's of objects in the gridfs store, which are not id's of objects with filenames.""" v = self.gridfs._GridFS__files.find({'filename': { '$exists': False }}, ['_id']) return [x['_id'] for x in v] def has_key(self, key): return self.gridfs.exists(filename=key) def save(self, obj, key=None, compress=None): """Save Python object obj to the grid file system self.gridfs. If key is None, the file is stored by MongoDB assigned ObjectID, and that id is returned. """ from sage.all import dumps data = dumps(obj, compress=compress) if key is not None: self.gridfs.put(data, filename=key) return key else: # store by MongoDB assigned _id only, and return that id. return self.gridfs.put(data) def load(self, key, compress=True): from pymongo.objectid import ObjectId if isinstance(key, ObjectId): data = self.gridfs.get(key).read() else: data = self.gridfs.get_last_version(key).read() from sage.all import loads return loads(data, compress=compress)
def _get_unique_filename(name, db_alias=DEFAULT_CONNECTION_NAME, collection_name='fs'): fs = GridFS(get_db(db_alias), collection_name) file_root, file_ext = os.path.splitext(name) count = itertools.count(1) while fs.exists(filename=name): # file_ext includes the dot. name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext)) return name
def _get_unique_filename(name, db_alias=DEFAULT_CONNECTION_NAME, collection_name='fs'): fs = GridFS(get_db(db_alias), collection_name) file_root, file_ext = os.path.splitext(name) count = itertools.count(1) while fs.exists(filename=name): # file_ext includes the dot. name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext)) return name
def _get_unique_filename(name): fs = GridFS(_get_db()) file_root, file_ext = os.path.splitext(name) count = itertools.count(1) while fs.exists(filename=name): # file_ext includes the dot. name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext)) return name
def mongo_image(db, image_id): """Lê imagem do Banco MongoDB. Retorna None se ID não encontrado.""" fs = GridFS(db) _id = ObjectId(image_id) if fs.exists(_id): grid_out = fs.get(_id) image = grid_out.read() return image return None
def delete_files(self, set_name, ids): self.globalLock.acquire() try: grid_fs = GridFS(self.db, collection=set_name) for each in ids: if grid_fs.exists(document_or_id=ObjectId(each)): grid_fs.delete(ObjectId(each)) finally: self.globalLock.release()
def insertFile(self, db, filePath, query): # 将文件存入数据表 fs = GridFS(db, self.setname) if fs.exists(query): print('已经存在该文件') else: with open(filePath, 'rb') as fileObj: data = fileObj.read() ObjectId = fs.put(data, filename=filePath.split('\\')[-1]) fileObj.close() return ObjectId
def mongo_image(db, image_id, bboxes=False): """Lê imagem do Banco MongoDB. Retorna None se ID não encontrado.""" fs = GridFS(db) _id = ObjectId(image_id) if fs.exists(_id): grid_out = fs.get(_id) image = grid_out.read() if bboxes: predictions = grid_out.metadata.get('predictions') if predictions: bboxes = [pred.get('bbox') for pred in predictions] image = draw_bboxes(image, bboxes) return image return None
def insertFile(self, db, filePath, query, label): #将文件存入数据表 fs = GridFS(db, self.file_table) if fs.exists(query): # print('已经存在该文件') pass else: with open(filePath, 'rb') as fileObj: data = fileObj.read() ObjectId = fs.put(data, filename=filePath.split('/')[-1], label=label) #print(ObjectId) fileObj.close() return ObjectId
def downLoadFile(self, file_id): client = pymongo.MongoClient(self.db_url) db = client[self.db] gridfs_col = GridFS(db, collection="file_info") query = {"file_id": ""} query["file_id"] = file_id if gridfs_col.exists(query) == False: file_data = None else: file_name = self.filter_single_grid_info( file_id=file_id)[0]["file_name"] print(file_name) file_data = gridfs_col.get_version(file_id=file_id, version=-1).read() return file_data, file_name
def padma_proxy(image_id): """Teste. Envia uma imagem para padma teste e repassa retorno.""" db = app.config['mongodb'] fs = GridFS(db) _id = ObjectId(image_id) if fs.exists(_id): grid_out = fs.get(_id) image = grid_out.read() # filename = grid_out.filename data = {} data['file'] = image headers = {} # headers['Content-Type'] = 'image/jpeg' r = requests.post(PADMA_URL + '/teste', files=data, headers=headers) result = r.text return result
def process_image_request(file_id, size): """ Resizes images to size and returns a base64 encoded string representing the image """ try: sizes = { 'small': (140, 100), 'medium': (400, 300), 'large': (1200, 1000) } col = app.data.driver.db['files'] image = col.find_one({'_id': ObjectId(file_id)}) grid_fs = GridFS(app.data.driver.db) if not grid_fs.exists(_id=image['file']): return eve_abort(500, 'No file system found') im_stream = grid_fs.get_last_version(_id=image['file']) im = Image.open(im_stream) if size != 'original': im.thumbnail(sizes[size], Image.ANTIALIAS) img_io = io.BytesIO() im.save(img_io, 'PNG', quality=100) img_io.seek(0) encoded_img = base64.b64encode(img_io.read()) dict = { 'mimetype': 'image/png', 'encoding': 'base64', 'src': encoded_img } # Jsonify the dictionary and return it return jsonify(**dict) # Sends an image # return send_file(img_io, mimetype='image/png') except Exception as e: pass return eve_abort(404, 'Image not found or errors processing')
class GridfsStorageBackend(object): def __init__(self, db, collection_name="storage"): from gridfs import GridFS self.fs = GridFS(db, collection_name) def __get_file_object(self, key): from gridfs import NoFile try: return self.fs.get_version(filename=key) except NoFile: raise KeyError(key) def __contains__(self, key): return self.fs.exists(filename=key) def __getitem__(self, key): return iterate_file_object(self.__get_file_object(key)) def put_file(self, key, tmpfile): # ResourceDatabase will check to make sure the file doesn't already # exist before calling this, but in the event of a race condition this # may be called twice for a given key. Fortunately this will cause no # issues, but it seems to result in two "versions" of the file being in # gridfs, which wastes some space (but not very much, if race # conditions are rare). # # FIXME: look into whether it is possible to drop old versions # automatically in gridfs with file(tmpfile) as f: self.fs.put(f, filename=key) def __delitem__(self, key): self.fs.delete(self.__get_file_object(key)._id) def keys(self): if six.PY3: return self.iterkeys() else: return self.fs.list() def iterkeys(self): return iter(self.fs.list()) __iter__ = iterkeys def __len__(self): return len(self.fs.list())
def upLoadFile(self, file_coll, file_name, data_link): filter_condition = {"filename": file_name, "url": data_link} gridfs_col = GridFS(self.db, collection=file_coll) file_ = "0" query = {"filename": ""} query["filename"] = file_name if gridfs_col.exists(query): print('已经存在该文件') else: with open(file_name, 'rb') as file_r: file_data = file_r.read() file_ = gridfs_col.put(data=file_data, **filter_condition) # 上传到gridfs print(file_) return file_
class GridFsBackend(BaseBackend): ''' A Mongo GridFS backend Expect the following settings: - `mongo_url`: The Mongo access URL - `mongo_db`: The database to store the file in. ''' def __init__(self, name, config): super(GridFsBackend, self).__init__(name, config) self.client = MongoClient(config.mongo_url) self.db = self.client[config.mongo_db] self.fs = GridFS(self.db, self.name) def exists(self, filename): return self.fs.exists(filename=filename) @contextmanager def open(self, filename, mode='r', encoding='utf8'): if 'r' in mode: f = self.fs.get_last_version(filename) yield f if 'b' in mode else codecs.getreader(encoding)(f) else: # mode == 'w' f = io.BytesIO() if 'b' in mode else io.StringIO() yield f params = {'filename': filename} if 'b' not in mode: params['encoding'] = encoding self.fs.put(f.getvalue(), **params) def read(self, filename): f = self.fs.get_last_version(filename) return f.read() def write(self, filename, content): return self.fs.put(self.as_binary(content), filename=filename) def delete(self, filename): for version in self.fs.find({'filename': filename}): self.fs.delete(version._id) def serve(self, filename): file = self.fs.get_last_version(filename) return send_file(file, mimetype=file.content_type)
def get_image(_id, n, pil=False): """Recupera, recorta a imagem do banco e retorna.""" db = app.config['mongodb'] fs = GridFS(db) _id = ObjectId(_id) if fs.exists(_id): grid_data = fs.get(_id) if n is not None: n = int(n) preds = grid_data.metadata.get('predictions') if preds: bboxes = [pred.get('bbox') for pred in preds] if len(bboxes) >= n + 1 and bboxes[n]: image = grid_data.read() image = recorta_imagem(image, bboxes[n], pil=pil) return image return None
def account(conf, options, args): '''View details or summary of all accounts.''' con = Connection(conf['MONGODB_HOST'], conf['MONGODB_PORT']) db = con[conf['MONGODB_NAME']] fs = GridFS(db) if options.all: query = None elif len(args) == 2: query = { '_id': int(args[1]) } if args[1].isdigit() else { 'email': args[1] } else: log.error('account <email or _id> requires a valid email or _id') sys.exit(1) for acc in db.accounts.find(query): if str(acc['_id']).startswith('_'): continue print '%s [id:%s]' % (acc['email'], acc['id']) for key in acc: if key in ['email', '_id', 'id']: continue if key == 'items': try: size = sum([fs.get(_id).length for _id in acc['items']]) except NoFile: log.warn('Account `%s` has some files missing:', _id) # fail safe counting size = 0 missing = [] for i in acc['items']: if not fs.exists(i): missing.append(i) else: size += fs.get(i).length print ' size: %s' % ppsize(size) print ' %s: %s' % (key, acc[key]) if options.all: print db.accounts.count() - 1, 'accounts total' # -1 for _autoinc con.disconnect()
def insertFile(self, db, filePath, query): ''' save file :param db: :param filePath: :param query: :return: ''' fs = GridFS(db, self.file_collection) if fs.exists(query): print('alreay exists!') else: with open(filePath, 'rb') as fileObj: data = fileObj.read() ObjectId = fs.put(data, filename=filePath.split('/')[-1]) print(ObjectId) fileObj.close() return ObjectId
class FileRepository(): def __init__(self, db: Database): self.fs = GridFS(db) def get_file(self, id: ObjectId) -> bytes: if not self.fs.exists(id): raise NonExistentError("The request file with the id " + str(id) + " does not exist") return self.fs.get(id).read() def put_file(self, file: bytes) -> ObjectId: return self.fs.put(file) def delete_file(self, id: ObjectId): self.fs.delete(id) def replace_file(self, old_id: ObjectId, new_file: bytes) -> ObjectId: self.delete_file(old_id) return self.put_file(new_file)
def upLoadFile(self, file_coll, file_path, file_name, task_id): client = pymongo.MongoClient(self.dbURL) db = client["CrowdData"] filter_condition = { "fileName": file_name, "taskId": task_id, "filePath": file_path } gridfs_col = GridFS(db, collection=file_coll) file_ = "0" if gridfs_col.exists(filter_condition): print('已经存在该文件') else: with open(file_path, 'rb') as file_r: file_data = file_r.read() file_ = gridfs_col.put(data=file_data, **filter_condition) # 上传到gridfs print(file_) return file_
class GridFsBackendTest(BackendTestCase): @pytest.fixture(autouse=True) def setup(self): self.client = MongoClient() self.db = self.client[TEST_DB] self.gfs = GridFS(self.db, 'test') self.config = Config({ 'mongo_url': 'mongodb://localhost:27017', 'mongo_db': TEST_DB, }) self.backend = GridFsBackend('test', self.config) yield self.client.drop_database(TEST_DB) def put_file(self, filename, content): self.gfs.put(content, filename=filename, encoding='utf-8') def get_file(self, filename): file = self.gfs.get_last_version(filename) assert file is not None return file.read() def file_exists(self, filename): return self.gfs.exists(filename=filename) def test_default_bucket(self): backend = GridFsBackend('test_bucket', self.config) assert backend.fs._GridFS__collection.name == 'test_bucket' def test_config(self): assert self.backend.client.address == ('localhost', 27017) assert self.backend.db.name == TEST_DB def test_delete_with_versions(self, faker): filename = 'test.txt' self.put_file(filename, faker.sentence()) self.put_file(filename, faker.sentence()) assert self.gfs.find({'filename': filename}).count() == 2 self.backend.delete(filename) assert not self.file_exists(filename)
def upLoadFile(self, file_name, collection, data_link, host, author): client = pymongo.MongoClient(self.db_url) db = client[self.db] filter_condition = {"filename": file_name, "url": data_link, "host": host, "author": author} gridfs_col = GridFS(db, collection=collection) file_ = "0" query = {"filename": "", "author": ""} query["filename"] = file_name query["author"] = author if gridfs_col.exists(query): return {"result": "file is exist"} else: try: with open(file_name, 'rb') as file_r: file_data = file_r.read() file_ = gridfs_col.put(data=file_data, **filter_condition) # 上传到gridfs except: file_ = {"result": "upload file is not exist"} return file_
def preenche_bbox(db, engine, limit=2000, start=None): Session = sessionmaker(bind=engine) session = Session() if start: lista_conformidade = session.query(Conformidade) \ .filter(Conformidade.dataescaneamento >= start) \ .filter(Conformidade.bbox_classe.is_(None)).limit(limit).all() else: lista_conformidade = session.query(Conformidade) \ .filter(Conformidade.bbox_classe.is_(None)).limit(limit).all() tempo = time.time() qtde = 0 try: for conformidade in lista_conformidade: classe = None score = None fs = GridFS(db) _id = ObjectId(conformidade.id_imagem) if fs.exists(_id): grid_data = fs.get(_id) preds = grid_data.metadata.get('predictions') if preds: bboxes = preds[0].get('bbox') image = grid_data.read() image = recorta_imagem(image, bboxes, pil=True) classe = preds[0].get('class') score = preds[0].get('score') if classe: conformidade.bbox_classe = classe conformidade.bbox_score = score conformidade.laplacian = calcula_laplacian(image) session.add(conformidade) qtde += 1 session.commit() except Exception as err: logger.error(err, exc_info=True) session.rollback() tempo = time.time() - tempo tempo_registro = 0 if (qtde == 0) else (tempo / qtde) logger.info(f'{qtde} bbox preenchidos em {tempo} segundos.' + f'{tempo_registro} por registro')
def account(conf, options, args): '''View details or summary of all accounts.''' con = Connection(conf['MONGODB_HOST'], conf['MONGODB_PORT']) db = con[conf['MONGODB_NAME']] fs = GridFS(db) if options.all: query = None elif len(args) == 2: query = {'_id': int(args[1])} if args[1].isdigit() else {'email': args[1]} else: log.error('account <email or _id> requires a valid email or _id') sys.exit(1) for acc in db.accounts.find(query): if str(acc['_id']).startswith('_'): continue print '%s [id:%s]' % (acc['email'], acc['id']) for key in acc: if key in ['email', '_id', 'id']: continue if key == 'items': try: size = sum([fs.get(_id).length for _id in acc['items']]) except NoFile: log.warn('Account `%s` has some files missing:', _id) # fail safe counting size = 0 missing = [] for i in acc['items']: if not fs.exists(i): missing.append(i) else: size += fs.get(i).length print' size: %s' % ppsize(size) print ' %s: %s' % (key, acc[key]) if options.all: print db.accounts.count()-1, 'accounts total' # -1 for _autoinc con.disconnect()
def get_file(rtype, filename): if (not session.get('logged_in')): return redirect(url_for('login')) if (rtype not in ['audios', 'videos', 'flags', 'images', 'thumbs']): abort(404) gridfsdb = database.Database( MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), rtype) fs = GridFS(gridfsdb) if (fs.exists(filename=filename)): file = fs.get_last_version(filename) mime = mimetypes.guess_type(filename)[0] response = virtualrest.response_class(file, direct_passthrough=True, mimetype=mime) response.headers.set('Content-Length', file.length) response.headers.set('Cache-Control', 'no-cache, no-store, must-revalidate') response.headers.set('Pragma', 'no-cache') response.headers.set('Expires', '0') return response abort(404)
def process_image_request(file_id, size): """ Resizes images to size and returns a base64 encoded string representing the image """ sizes = {'small': (140,100), 'medium': (400, 300), 'large': (1200, 1000) } col = app.data.driver.db['files'] image = col.find_one({'_id': ObjectId(file_id)}) grid_fs = GridFS(app.data.driver.db) if not grid_fs.exists(_id=image['file']): eve_abort(500, 'No file system found') im_stream = grid_fs.get_last_version(_id=image['file']) im = Image.open(im_stream) if size != 'original': im.thumbnail(sizes[size], Image.ANTIALIAS) img_io = io.BytesIO() im.save(img_io, 'PNG', quality=100) img_io.seek(0) encoded_img = base64.b64encode(img_io.read()) dict = {'mimetype': 'image/png', 'encoding': 'base64', 'src': encoded_img } # Jsonify the dictionary and return it return jsonify(**dict)
def process_request(file_id): """ This is the router actially for processing """ if has_permission(): col = app.data.driver.db['files'] file = col.find_one({'_id': ObjectId(file_id)}) if not file: eve_abort(404, 'No file found') try: grid_fs = GridFS(app.data.driver.db) if not grid_fs.exists(_id=file['file']): eve_abort(404, 'No file found') stream = grid_fs.get_last_version(_id=file['file']) response = make_response(stream.read()) response.mimetype = stream.content_type return response except NoFile: eve_abort(404, 'No file found')
class FileStoreMongo(FileStore): """ Filestore database using GridFS (see :mod:`gridfs`) :arg pymongo.database.Database connection: MongoDB database object """ def __init__(self, connection): self._conn=connection self.new_context() self._fs=GridFS(self.database) def _filename(self, **kwargs): return {'session': kwargs.get('session', kwargs.get('cell_id', 'SESSION NOT FOUND')), 'filename': kwargs['filename']} @Debugger def new_file(self, **kwargs): """ See :meth:`FileStore.new_file` :rtype: :class:`gridfs.grid_file.GridIn` """ self.delete_files(**kwargs) log("FS Creating %s"%self._filename(**kwargs)) return self._fs.new_file(**self._filename(**kwargs)) @Debugger def delete_files(self, **kwargs): """ See :meth:`FileStore.delete_files` """ while self._fs.exists(self._filename(**kwargs)): self._fs.delete(self._fs.get_last_version(**self._filename(**kwargs))._id) @Debugger def get_file(self, **kwargs): """ See :meth:`FileStore.get_file` :rtype: :class:`gridfs.grid_file.GridOut` """ if self._fs.exists(self._filename(**kwargs)): return self._fs.get(self._fs.get_last_version(**self._filename(**kwargs))._id) else: return None @Debugger def create_file(self, file_handle, **kwargs): """ See :meth:`FileStore.create_file` """ with self.new_file(**kwargs) as f: f.write(file_handle.read()) @Debugger def copy_file(self, file_handle, **kwargs): """ See :meth:`FileStore.copy_file` """ file_handle.write(self.get_file(**kwargs).read()) @Debugger def new_context(self): """ See :meth:`FileStore.new_context` """ self.database=pymongo.database.Database(self._conn, mongo_config['mongo_db']) uri=mongo_config['mongo_uri'] if '@' in uri: # strip off optional mongodb:// part if uri.startswith('mongodb://'): uri=uri[len('mongodb://'):] result=self.database.authenticate(uri[:uri.index(':')],uri[uri.index(':')+1:uri.index('@')]) if result==0: raise Exception("MongoDB authentication problem") @Debugger def new_context_copy(self): """ See :meth:`FileStore.new_context_copy` """ return type(self)(self._conn) valid_untrusted_methods=()
path = os.path.abspath(os.path.join(root, item)) full_path = '%s:%s' % (server, path) try: # Get the MD5 hash of the file handle = open(path, 'rb') digest = hashlib.md5() for block in iter(lambda: handle.read(1024*1024), ""): digest.update(block) handle.seek(0) md5 = digest.hexdigest() except IOError: logging.error('unable to read file %s', full_path) error_count += 1 continue # Add the file to MongoDB if it isn't already added if fs.exists(md5=md5): db['%s.files' % args.collection].update({'md5': md5}, {'$addToSet': {'aliases': full_path}}) logging.debug('Updated file %s', full_path) else: kwargs = {'filename': item, 'aliases': [full_path]} mimetype = args.mimetype or \ mimetypes.guess_type(item, strict=False)[0] if mimetype: kwargs['contentType'] = mimetype try: _id = fs.put(handle, **kwargs) write_count += 1 logging.debug('Wrote file %s', full_path) except IOError: logging.error('unable to write file %s', full_path)
def climate_itp_weight_thiessen(conn, db_model, subbsn_id, geodata2dbdir): """Generate and import weight information using Thiessen polygon method. Args: conn: db_model: workflow database object subbsn_id: subbasin id geodata2dbdir: directory to store weight data as txt file """ spatial_gfs = GridFS(db_model, DBTableNames.gridfs_spatial) # read mask file from mongodb mask_name = str(subbsn_id) + '_MASK' if not spatial_gfs.exists(filename=mask_name): raise RuntimeError('%s is not existed in MongoDB!' % mask_name) mask = db_model[DBTableNames.gridfs_spatial].files.find({'filename': mask_name})[0] ysize = int(mask['metadata'][RasterMetadata.nrows]) xsize = int(mask['metadata'][RasterMetadata.ncols]) nodata_value = mask['metadata'][RasterMetadata.nodata] dx = mask['metadata'][RasterMetadata.cellsize] xll = mask['metadata'][RasterMetadata.xll] yll = mask['metadata'][RasterMetadata.yll] data = spatial_gfs.get(mask['_id']) total_len = xsize * ysize fmt = '%df' % (total_len,) data = unpack(fmt, data.read()) # print(data[0], len(data), type(data)) # count number of valid cells num = 0 for type_i in range(0, total_len): if abs(data[type_i] - nodata_value) > UTIL_ZERO: num += 1 # read stations information from database metadic = {RasterMetadata.subbasin: subbsn_id, RasterMetadata.cellnum: num} site_lists = db_model[DBTableNames.main_sitelist].find({FieldNames.subbasin_id: subbsn_id}) site_list = next(site_lists) clim_db_name = site_list[FieldNames.db] p_list = site_list.get(FieldNames.site_p) m_list = site_list.get(FieldNames.site_m) pet_list = site_list.get(FieldNames.site_pet) # print(p_list) # print(m_list) hydro_clim_db = conn[clim_db_name] type_list = [DataType.m, DataType.p, DataType.pet] site_lists = [m_list, p_list, pet_list] if pet_list is None: del type_list[2] del site_lists[2] # if storm_mode: # todo: Do some compatible work for storm and longterm models. # type_list = [DataType.p] # site_lists = [p_list] for type_i, type_name in enumerate(type_list): fname = '%d_WEIGHT_%s' % (subbsn_id, type_name) # print(fname) if spatial_gfs.exists(filename=fname): x = spatial_gfs.get_version(filename=fname) spatial_gfs.delete(x._id) site_list = site_lists[type_i] if site_list is not None: site_list = site_list.split(',') # print(site_list) site_list = [int(item) for item in site_list] metadic[RasterMetadata.site_num] = len(site_list) # print(site_list) q_dic = {StationFields.id: {'$in': site_list}, StationFields.type: type_list[type_i]} cursor = hydro_clim_db[DBTableNames.sites].find(q_dic).sort(StationFields.id, 1) # meteorology station can also be used as precipitation station if cursor.count() == 0 and type_list[type_i] == DataType.p: q_dic = {StationFields.id.upper(): {'$in': site_list}, StationFields.type.upper(): DataType.m} cursor = hydro_clim_db[DBTableNames.sites].find(q_dic).sort(StationFields.id, 1) # get site locations id_list = list() loc_list = list() for site in cursor: if site[StationFields.id] in site_list: id_list.append(site[StationFields.id]) loc_list.append([site[StationFields.x], site[StationFields.y]]) # print('loclist', locList) # interpolate using the locations myfile = spatial_gfs.new_file(filename=fname, metadata=metadic) with open(r'%s/weight_%d_%s.txt' % (geodata2dbdir, subbsn_id, type_list[type_i]), 'w') as f_test: for y in range(0, ysize): for x in range(0, xsize): index = int(y * xsize + x) if abs(data[index] - nodata_value) > UTIL_ZERO: x_coor = xll + x * dx y_coor = yll + (ysize - y - 1) * dx near_index = 0 line, near_index = ImportWeightData.thiessen(x_coor, y_coor, loc_list) myfile.write(line) fmt = '%df' % (len(loc_list)) f_test.write('%f %f ' % (x, y) + unpack(fmt, line).__str__() + '\n') myfile.close()
class GridFsBackendTest(BackendTestCase): hasher = 'md5' @pytest.fixture def pngimage(self, pngfile): with open(pngfile, 'rb') as f: yield f @pytest.fixture def jpgimage(self, jpgfile): with open(jpgfile, 'rb') as f: yield f @pytest.fixture(autouse=True) def setup(self): self.client = MongoClient() self.db = self.client[TEST_DB] self.gfs = GridFS(self.db, 'test') self.config = Config({ 'mongo_url': 'mongodb://localhost:27017', 'mongo_db': TEST_DB, }) self.backend = GridFsBackend('test', self.config) yield self.client.drop_database(TEST_DB) def put_file(self, filename, content): self.gfs.put(content, filename=filename, encoding='utf-8') def get_file(self, filename): file = self.gfs.get_last_version(filename) assert file is not None return file.read() def file_exists(self, filename): return self.gfs.exists(filename=filename) def test_default_bucket(self): backend = GridFsBackend('test_bucket', self.config) assert backend.fs._GridFS__collection.name == 'test_bucket' def test_config(self): assert self.backend.client.address == ('localhost', 27017) assert self.backend.db.name == TEST_DB def test_delete_with_versions(self, faker): filename = 'test.txt' self.put_file(filename, faker.sentence()) self.put_file(filename, faker.sentence()) assert self.gfs.find({'filename': filename}).count() == 2 self.backend.delete(filename) assert not self.file_exists(filename) def test_write_pngimage(self, pngimage, utils): filename = 'test.png' content = six.binary_type(pngimage.read()) content_type = mimetypes.guess_type(filename)[0] f = utils.filestorage(filename, content, content_type) self.backend.write(filename, f) with self.backend.open(filename, 'rb') as f: assert f.content_type == content_type self.assert_bin_equal(filename, content) def test_write_jpgimage(self, jpgimage, utils): filename = 'test.jpg' content = six.binary_type(jpgimage.read()) content_type = mimetypes.guess_type(filename)[0] f = utils.filestorage(filename, content, content_type) self.backend.write(filename, f) with self.backend.open(filename, 'rb') as f: assert f.content_type == content_type self.assert_bin_equal(filename, content)
class GridFsBackend(BaseBackend): ''' A Mongo GridFS backend Expect the following settings: - `mongo_url`: The Mongo access URL - `mongo_db`: The database to store the file in. ''' def __init__(self, name, config): super(GridFsBackend, self).__init__(name, config) self.client = MongoClient(config.mongo_url) self.db = self.client[config.mongo_db] self.fs = GridFS(self.db, self.name) def exists(self, filename): return self.fs.exists(filename=filename) @contextmanager def open(self, filename, mode='r', encoding='utf8'): if 'r' in mode: f = self.fs.get_last_version(filename) yield f if 'b' in mode else codecs.getreader(encoding)(f) else: # mode == 'w' f = io.BytesIO() if 'b' in mode else io.StringIO() yield f params = {'filename': filename} if 'b' not in mode: params['encoding'] = encoding self.fs.put(f.getvalue(), **params) def read(self, filename): f = self.fs.get_last_version(filename) return f.read() def write(self, filename, content): kwargs = { 'filename': filename } if hasattr(content, 'content_type') and content.content_type is not None: kwargs['content_type'] = content.content_type return self.fs.put(self.as_binary(content), **kwargs) def delete(self, filename): regex = '^{0}'.format(re.escape(filename)) for version in self.fs.find({'filename': {'$regex': regex}}): self.fs.delete(version._id) def copy(self, filename, target): src = self.fs.get_last_version(filename) self.fs.put(src, filename=target, content_type=src.content_type) def list_files(self): for f in self.fs.list(): yield f def serve(self, filename): file = self.fs.get_last_version(filename) return send_file(file, mimetype=file.content_type) def get_metadata(self, filename): f = self.fs.get_last_version(filename) return { 'checksum': 'md5:{0}'.format(f.md5), 'size': f.length, 'mime': f.content_type, 'modified': f.upload_date, }
def get_file(rtype,filename): MAX_SIZE=2097152 # 2MB if (not session.get('logged_in')): abort(403) if (rtype not in ['audios','videos','flags','images', 'thumbs']): abort(404) griddb = database.Database(MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT),rtype) fs = GridFS(griddb) if (fs.exists(filename=filename)): file = fs.get_last_version(filename) file_length = file.length chunk_length = file_length mime = guess_type(filename)[0] range_header = False if ('Range' in request.headers.keys()): range_header = True start, end = request.headers['Range'].split('=')[1].split(',')[0].split('-') if (end == '' or int(end) > file_length): end = file_length if (start == ''): start = file_length - int(end) chunk_file = StringIO() end = int(end) start = int(start) file.seek(start) chunk_file.write(file.read(end)) chunk_file.seek(0) chunk_length = end - start file.close() else: file.seek(0) def generate(): while True: if (range_header): chunk = chunk_file.read(MAX_SIZE) else: chunk = file.read(MAX_SIZE) if not chunk: break yield chunk if (chunk_length > MAX_SIZE): if (range_header): response = Response(stream_with_context(generate()), 206, mimetype = mime) response.headers.set('Content-Range', 'bytes %d-%d/%d' % (start, (start + chunk_length) - 1, file_length)) else: response = Response(stream_with_context(generate()), 200, mimetype = mime) response.headers.set('Content-Length',file_length) else: if (range_header): response = virtualrest.response_class(chunk_file, 206, direct_passthrough = True, mimetype = mime) response.headers.set('Content-Range', 'bytes %d-%d/%d' % (start, (start + chunk_length) - 1, file_length)) else: response = virtualrest.response_class(file, 200, direct_passthrough = True, mimetype = mime) response.headers.set('Content-Length',file_length) if (rtype in ['audio','video']): response.headers.set('Cache-Control', 'no-cache, no-store, must-revalidate') response.headers.set('Pragma','no-cache') response.headers.set('Accept-Ranges','bytes') return response abort(404)
def exists(self): f = GridFS(self._database, collection=self._collection) return f.exists(self._value)
def parse_cmd_options(self): parser = OptionParser(version=offlineimap.__version__, description="%s.\n\n%s" % (offlineimap.__copyright__, offlineimap.__license__)) parser.add_option("--dry-run", action="store_true", dest="dryrun", default=False, help="Do not actually modify any store but check and print " "what synchronization actions would be taken if a sync would be" " performed. It will not precisely give the exact information w" "hat will happen. If e.g. we need to create a folder, it merely" " outputs 'Would create folder X', but not how many and which m" "ails it would transfer.") parser.add_option("--info", action="store_true", dest="diagnostics", default=False, help="Output information on the configured email repositories" ". Useful for debugging and bug reporting. Use in conjunction wit" "h the -a option to limit the output to a single account. This mo" "de will prevent any actual sync to occur and exits after it outp" "ut the debug information.") parser.add_option("-1", action="store_true", dest="singlethreading", default=False, help="Disable all multithreading operations and use " "solely a single-thread sync. This effectively sets the " "maxsyncaccounts and all maxconnections configuration file " "variables to 1.") parser.add_option("-P", dest="profiledir", metavar="DIR", help="Sets OfflineIMAP into profile mode. The program " "will create DIR (it must not already exist). " "As it runs, Python profiling information about each " "thread is logged into profiledir. Please note: " "This option is present for debugging and optimization " "only, and should NOT be used unless you have a " "specific reason to do so. It will significantly " "decrease program performance, may reduce reliability, " "and can generate huge amounts of data. This option " "implies the -1 option.") parser.add_option("-a", dest="accounts", metavar="ACCOUNTS", help="Overrides the accounts section in the config file. " "Lets you specify a particular account or set of " "accounts to sync without having to edit the config " "file. You might use this to exclude certain accounts, " "or to sync some accounts that you normally prefer not to.") parser.add_option("-c", dest="configfile", metavar="FILE", default="~/.offlineimaprc", help="Specifies a configuration file to use in lieu of '%default'.\n" "Configuration files stored in MongoDB are supported via the " "following URI syntax:\n" "mongodb://*****:*****@server:port/db/bucket/file.conf") parser.add_option("-d", dest="debugtype", metavar="type1,[type2...]", help="Enables debugging for OfflineIMAP. This is useful " "if you are to track down a malfunction or figure out what is " "going on under the hood. This option requires one or more " "debugtypes, separated by commas. These define what exactly " "will be debugged, and so far include two options: imap, thread, " "maildir or ALL. The imap option will enable IMAP protocol " "stream and parsing debugging. Note that the output may contain " "passwords, so take care to remove that from the debugging " "output before sending it to anyone else. The maildir option " "will enable debugging for certain Maildir operations. " "The use of any debug option (unless 'thread' is included), " "implies the single-thread option -1.") parser.add_option("-l", dest="logfile", metavar="FILE", help="Log to FILE") parser.add_option("-f", dest="folders", metavar="folder1,[folder2...]", help="Only sync the specified folders. The folder names " "are the *untranslated* foldernames of the remote repository. " "This command-line option overrides any 'folderfilter' " "and 'folderincludes' options in the configuration file.") parser.add_option("-k", dest="configoverride", action="append", metavar="[section:]option=value", help= """Override configuration file option. If"section" is omitted, it defaults to "general". Any underscores in the section name are replaced with spaces: for instance, to override option "autorefresh" in the "[Account Personal]" section in the config file one would use "-k Account_Personal:autorefresh=30".""") parser.add_option("-o", action="store_true", dest="runonce", default=False, help="Run only once, ignoring any autorefresh setting " "in the configuration file.") parser.add_option("-q", action="store_true", dest="quick", default=False, help="Run only quick synchronizations. Ignore any " "flag updates on IMAP servers (if a flag on the remote IMAP " "changes, and we have the message locally, it will be left " "untouched in a quick run.") parser.add_option("-u", dest="interface", help="Specifies an alternative user interface to " "use. This overrides the default specified in the " "configuration file. The UI specified with -u will " "be forced to be used, even if checks determine that it is " "not usable. Possible interface choices are: %s " % ", ".join(UI_LIST.keys())) (options, args) = parser.parse_args() #read in configuration file #: :type: string configfilename = os.path.expanduser(options.configfile) url = urlparse(configfilename) config = CustomConfigParser() # connect to gridfs if url.scheme == 'mongodb': try: from gridfs import GridFS from pymongo import Connection except ImportError: logging.error(" *** pymongo must be installed to use mongodb as configuration source; aborting!") sys.exit(1) path = url.path (path, filename) = os.path.split(path) (db, collection) = os.path.split(path) db_name = os.path.basename(db) mongo_uri = ''.join([url[0], '://', url[1], db]) mongodb = Connection(mongo_uri) gfs = GridFS(mongodb[db_name], collection) if not gfs.exists(filename=filename): logging.error(" *** Config file '%s' does not exist; aborting!" % url.geturl()) sys.exit(1) file = gfs.get_last_version(filename) config.readfp(file) else: if not os.path.exists(configfilename): # TODO, initialize and make use of chosen ui for logging logging.error(" *** Config file '%s' does not exist; aborting!" % configfilename) sys.exit(1) config.read(configfilename) #profile mode chosen? if options.profiledir: if not options.singlethreading: # TODO, make use of chosen ui for logging logging.warn("Profile mode: Forcing to singlethreaded.") options.singlethreading = True if os.path.exists(options.profiledir): # TODO, make use of chosen ui for logging logging.warn("Profile mode: Directory '%s' already exists!" % options.profiledir) else: os.mkdir(options.profiledir) threadutil.ExitNotifyThread.set_profiledir(options.profiledir) # TODO, make use of chosen ui for logging logging.warn("Profile mode: Potentially large data will be " "created in '%s'" % options.profiledir) #override a config value if options.configoverride: for option in options.configoverride: (key, value) = option.split('=', 1) if ':' in key: (secname, key) = key.split(':', 1) section = secname.replace("_", " ") else: section = "general" config.set(section, key, value) #which ui to use? cmd line option overrides config file ui_type = config.getdefault('general','ui', 'ttyui') if options.interface != None: ui_type = options.interface if '.' in ui_type: #transform Curses.Blinkenlights -> Blinkenlights ui_type = ui_type.split('.')[-1] # TODO, make use of chosen ui for logging logging.warning('Using old interface name, consider using one ' 'of %s' % ', '.join(UI_LIST.keys())) if options.diagnostics: ui_type = 'basic' # enforce basic UI for --info #dry-run? Set [general]dry-run=True if options.dryrun: dryrun = config.set('general','dry-run', "True") config.set_if_not_exists('general','dry-run','False') try: # create the ui class self.ui = UI_LIST[ui_type.lower()](config) except KeyError: logging.error("UI '%s' does not exist, choose one of: %s" % \ (ui_type,', '.join(UI_LIST.keys()))) sys.exit(1) setglobalui(self.ui) #set up additional log files if options.logfile: self.ui.setlogfile(options.logfile) #welcome blurb self.ui.init_banner() if options.debugtype: self.ui.logger.setLevel(logging.DEBUG) if options.debugtype.lower() == 'all': options.debugtype = 'imap,maildir,thread' #force single threading? if not ('thread' in options.debugtype.split(',') \ and not options.singlethreading): self.ui._msg("Debug mode: Forcing to singlethreaded.") options.singlethreading = True debugtypes = options.debugtype.split(',') + [''] for type in debugtypes: type = type.strip() self.ui.add_debug(type) if type.lower() == 'imap': imaplib.Debug = 5 if options.runonce: # FIXME: maybe need a better for section in accounts.getaccountlist(config): config.remove_option('Account ' + section, "autorefresh") if options.quick: for section in accounts.getaccountlist(config): config.set('Account ' + section, "quick", '-1') #custom folder list specified? if options.folders: foldernames = options.folders.split(",") folderfilter = "lambda f: f in %s" % foldernames folderincludes = "[]" for accountname in accounts.getaccountlist(config): account_section = 'Account ' + accountname remote_repo_section = 'Repository ' + \ config.get(account_section, 'remoterepository') config.set(remote_repo_section, "folderfilter", folderfilter) config.set(remote_repo_section, "folderincludes", folderincludes) if options.logfile: sys.stderr = self.ui.logfile socktimeout = config.getdefaultint("general", "socktimeout", 0) if socktimeout > 0: socket.setdefaulttimeout(socktimeout) threadutil.initInstanceLimit('ACCOUNTLIMIT', config.getdefaultint('general', 'maxsyncaccounts', 1)) for reposname in config.getsectionlist('Repository'): for instancename in ["FOLDER_" + reposname, "MSGCOPY_" + reposname]: if options.singlethreading: threadutil.initInstanceLimit(instancename, 1) else: threadutil.initInstanceLimit(instancename, config.getdefaultint('Repository ' + reposname, 'maxconnections', 2)) self.config = config return (options, args)
def exists(self, remote_path): client = self.connect() fs = GridFS(client[self.settings.database]) ret = fs.exists(filename=remote_path) client.close() return ret
class FuseGridFS(LoggingMixIn, Operations): def __init__(self, db_or_uri, collection=None): if collection == None: url = urlparse(db_or_uri) if url.scheme != 'mongodb': show_usage() path = url.path (db_path, collection) = os.path.split(path) db = os.path.basename(db_path) mongo_uri = ''.join([url[0], '://', url[1], db_path]) else: db = db_or_uri mongo_uri = '' cn = Connection(mongo_uri) self.db = cn[db] self.collection = self.db[collection] self.gfs = GridFS(self.db, collection=collection) def fix_path(self, path): if path == '/': return '' path = path[1:] if path.startswith('/') else path path = path.replace('/', '\\/') path = path+'\\/' if not path.endswith('/') else path return path def find_dirs(self, path): path = self.fix_path(path) map_function = Code(''' function () { var re = /^%s([\w ]+)\//; emit(re(this.filename)[1], 1); } ''' % path) reduce_function = Code(''' function(k,v) { return 1; } ''') res = self.collection.files.map_reduce(map_function, reduce_function, out='dirs', query={'filename' : { '$regex' : '^{0}([\w ]+)\/'.format(path) }}) if res.count() > 0: return [a['_id'] for a in res.find()] return [] def find_files(self, path): path = self.fix_path(path) map_function = Code(''' function () { var re = /^%s([\w ]+(?:\.[\w ]+)*)$/; emit(re(this.filename)[1], 1); } ''' % path) reduce_function = Code(''' function(k,v) { return 1; } ''') res = self.collection.files.map_reduce(map_function, reduce_function, out='dirs', query={'filename' : { '$regex' : '^{0}([\w ]+(?:\.[\w ]+)*)$'.format(path) }}) if res.count() > 0: return [a['_id'] for a in res.find()] return [] def is_dir(self, path): if path == '/': return True path = self.fix_path(path) res = self.collection.files.find_one({'filename' : { '$regex' : '^{0}'.format(path) }}) return not res is None def readdir(self, path, fh): dirs = self.find_dirs(path) files = self.find_files(path) return ['.', '..'] + dirs + files def fuse_to_mongo_path(self, path): return path[1:] if path.startswith('/') else path def get_mongo_file(self, path): path = self.fuse_to_mongo_path(path) return self.gfs.get_last_version(filename=path) if self.gfs.exists(filename=path) else None def getattr(self, path, fh=None): if path == '/' or self.is_dir(path): st = dict(st_mode=(S_IFDIR | 0755), st_nlink=2) else: file = self.get_mongo_file(path) if file: st = dict(st_mode=(S_IFREG | 0444), st_size=file.length) else: raise FuseOSError(ENOENT) st['st_ctime'] = st['st_mtime'] = st['st_atime'] = time() st['st_uid'], st['st_gid'], pid = fuse_get_context() return st def read(self, path, size, offset, fh): file = self.get_mongo_file(path) if file: file.seek(offset, os.SEEK_SET) return file.read(size) else: raise FuseOSError(ENOENT) # Disable unused operations: access = None flush = None getxattr = None listxattr = None open = None opendir = None release = None releasedir = None statfs = None def rename_dir(self, old, new): print('rename_dir: old=%s, new=%s' % (old, new)) search = self.fix_path(old) res = self.collection.files.find({ 'filename' : { '$regex' : '^{0}'.format(search) } }, fields={ 'filename' : 1 } ) if not res.count(): return 0 new = self.fuse_to_mongo_path(new) old = self.fuse_to_mongo_path(old) old_len = len(old) for a in res: new_name = new + a['filename'][old_len:] id = a['_id'] self.collection.files.update({ '_id' : id }, {'$set' : { 'filename' : new_name } }) return 0 def rename(self, old, new): if self.is_dir(old): return self.rename_dir(old, new) file = self.get_mongo_file(old) if not file: raise FuseOSError(ENOENT) new = self.fuse_to_mongo_path(new) self.collection.files.update({ '_id' : file._id }, {'$set' : { 'filename' : new } }) return 0
class Storage(BaseMongoModel): """ Abstraction of the storage. The purpose of this class is to create abstraction layer, which provides database-independent API for manipulation in the filesystem. The only requirement on the filesystem is that it has to support file versioning (or some workaround which implements versioning within the fs which does not support versioning natively). The implementation is nowadays built on MongoDB. Usage: >>> from pymongo import Connection >>> from model import Storage >>> store = Storage(Connection(), "myuid", "webarchive") >>> file = store.get("http://www.myfancypage.com/index.html") >>> # get last version of the file, which is available in the storage >>> c = file.get_last_content() >>> # get the raw data >>> c.data "<html> ... >>> # content type and content length >>> print c.content_type, c.length 'text/html' 29481 Design pattern: Factory """ def __init__(self, connection, uid, database="webarchive"): """ Initializes storage. @param connection: database connection @type connection: pymongo.Connection @param uid: user id (see Monitor.__doc__ for more info) @type uid: str @param database: if the storage is based on database, this param represents the name of database to be used within this instance. @type database: str """ if not isinstance(connection, Connection): raise TypeError("connection must be instance of pymongo.Connection.") self._connection = connection self._database = database self._uid = uid # instance of HTTP header model self._headermeta = HttpHeaderMeta(connection, uid, database) # filesystem interface self.filesystem = GridFS(self._connection[database], "content") #? print "STORAGE: FILESYSTEM: ",self.filesystem # flag representing possibility to save large objects into storage self.allow_large = False def allow_large_documents(self): """ Allow large objects to be stored in the storage. """ self.allow_large = True def get(self, filename): """ Get file object by filename. @param filename: name of the file. In this case, it will be URL. @type filaname: str @returns: File object representing file in many versions @rtype: File @raises: DocumentNotAvailable if document doesnt exist in the storage """ #? print "In Storage.get(): resource ",filename if not self.filesystem.exists(filename=filename): raise DocumentNotAvailable("File does not exist in the storage.") return File(filename, self.filesystem, self._headermeta) def check_uid(self): return self._headermeta.check_uid()
def generate_weight_dependent_parameters(conn, maindb, subbsn_id): """Generate some parameters dependent on weight data and only should be calculated once. Such as PHU0 (annual average total potential heat units) TMEAN0 (annual average temperature) added by Liangjun, 2016-6-17 """ spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial) # read mask file from mongodb mask_name = '%d_MASK' % subbsn_id # is MASK existed in Database? if not spatial_gfs.exists(filename=mask_name): raise RuntimeError('%s is not existed in MongoDB!' % mask_name) # read WEIGHT_M file from mongodb weight_m_name = '%d_WEIGHT_M' % subbsn_id mask = maindb[DBTableNames.gridfs_spatial].files.find({'filename': mask_name})[0] weight_m = maindb[DBTableNames.gridfs_spatial].files.find({'filename': weight_m_name})[0] num_cells = int(weight_m['metadata'][RasterMetadata.cellnum]) num_sites = int(weight_m['metadata'][RasterMetadata.site_num]) # read meteorology sites site_lists = maindb[DBTableNames.main_sitelist].find({FieldNames.subbasin_id: subbsn_id}) site_list = next(site_lists) db_name = site_list[FieldNames.db] m_list = site_list.get(FieldNames.site_m) hydro_clim_db = conn[db_name] site_list = m_list.split(',') site_list = [int(item) for item in site_list] q_dic = {StationFields.id: {'$in': site_list}, StationFields.type: DataType.phu0} cursor = hydro_clim_db[DBTableNames.annual_stats].find(q_dic).sort(StationFields.id, 1) q_dic2 = {StationFields.id: {'$in': site_list}, StationFields.type: DataType.mean_tmp0} cursor2 = hydro_clim_db[DBTableNames.annual_stats].find(q_dic2).sort(StationFields.id, 1) id_list = list() phu_list = list() for site in cursor: id_list.append(site[StationFields.id]) phu_list.append(site[DataValueFields.value]) id_list2 = list() tmean_list = list() for site in cursor2: id_list2.append(site[StationFields.id]) tmean_list.append(site[DataValueFields.value]) weight_m_data = spatial_gfs.get(weight_m['_id']) total_len = num_cells * num_sites # print(total_len) fmt = '%df' % (total_len,) weight_m_data = unpack(fmt, weight_m_data.read()) # calculate PHU0 phu0_data = np_zeros(num_cells) # calculate TMEAN0 tmean0_data = np_zeros(num_cells) for i in range(num_cells): for j in range(num_sites): phu0_data[i] += phu_list[j] * weight_m_data[i * num_sites + j] tmean0_data[i] += tmean_list[j] * weight_m_data[i * num_sites + j] ysize = int(mask['metadata'][RasterMetadata.nrows]) xsize = int(mask['metadata'][RasterMetadata.ncols]) nodata_value = mask['metadata'][RasterMetadata.nodata] mask_data = spatial_gfs.get(mask['_id']) total_len = xsize * ysize fmt = '%df' % (total_len,) mask_data = unpack(fmt, mask_data.read()) fname = '%d_%s' % (subbsn_id, DataType.phu0) fname2 = '%d_%s' % (subbsn_id, DataType.mean_tmp0) if spatial_gfs.exists(filename=fname): x = spatial_gfs.get_version(filename=fname) spatial_gfs.delete(x._id) if spatial_gfs.exists(filename=fname2): x = spatial_gfs.get_version(filename=fname2) spatial_gfs.delete(x._id) meta_dic = copy.deepcopy(mask['metadata']) meta_dic['TYPE'] = DataType.phu0 meta_dic['ID'] = fname meta_dic['DESCRIPTION'] = DataType.phu0 meta_dic2 = copy.deepcopy(mask['metadata']) meta_dic2['TYPE'] = DataType.mean_tmp0 meta_dic2['ID'] = fname2 meta_dic2['DESCRIPTION'] = DataType.mean_tmp0 myfile = spatial_gfs.new_file(filename=fname, metadata=meta_dic) myfile2 = spatial_gfs.new_file(filename=fname2, metadata=meta_dic2) vaild_count = 0 for i in range(0, ysize): cur_row = list() cur_row2 = list() for j in range(0, xsize): index = i * xsize + j if abs(mask_data[index] - nodata_value) > UTIL_ZERO: cur_row.append(phu0_data[vaild_count]) cur_row2.append(tmean0_data[vaild_count]) vaild_count += 1 else: cur_row.append(nodata_value) cur_row2.append(nodata_value) fmt = '%df' % xsize myfile.write(pack(fmt, *cur_row)) myfile2.write(pack(fmt, *cur_row2)) myfile.close() myfile2.close() print('Valid Cell Number of subbasin %d is: %d' % (subbsn_id, vaild_count)) return True
class DocumentStoringPipeline(MediaPipeline): def __init__(self, settings): if settings.get('MONGO_DATABASE') is None: raise NotConfigured() self.database = settings.get('MONGO_DATABASE') self.gridfs = GridFS(self.database) self.links = self.database.links self.links.ensure_index([('file', pymongo.ASCENDING), ('referer', pymongo.ASCENDING), ('number', pymongo.ASCENDING)]) super(DocumentStoringPipeline, self).__init__() @classmethod def from_settings(cls, settings): return cls(settings) def media_failed(self, failure, request, info): if not isinstance(failure.value, IgnoreRequest): referer = request.headers.get('Referer') log.msg(format='File (unknown-error): Error downloading ' '%(request)s referred in ' '<%(referer)s>: %(exception)s', level=log.WARNING, spider=info.spider, exception=failure.value, request=request, referer=referer) def media_downloaded(self, response, request, info): referer = request.headers.get('Referer') if response.status != 200: log.msg( format='File (code: %(status)s): Error downloading document from %(request)s referred in <%(referer)s>', level=log.WARNING, spider=info.spider, status=response.status, request=request, referer=referer) return if not response.body: log.msg( format='File (empty-content): Empty document from %(request)s referred in <%(referer)s>: no-content', level=log.WARNING, spider=info.spider, request=request, referer=referer) return log.msg(format='File: Downloaded document from %(request)s referred in <%(referer)s>', level=log.DEBUG, spider=info.spider, request=request, referer=referer) f = self.gridfs.new_file(content_type=request.meta['item']['mime_type'], url=request.url) try: f.write(response.body) finally: f.close() info.spider.crawler.stats.inc_value('downloaded_documents', spider=info.spider) info.spider.crawler.stats.inc_value('downloaded_documents/bytes_downloaded', len(response.body), spider=info.spider) self.store_link(info, request.meta['item'], f._id) def get_media_requests(self, item, info): if not isinstance(item, DocumentItem): return if self.gridfs.exists({'url': item['url']}): f = self.gridfs.get_last_version(url=item['url']) self.store_link(info, item, f._id) else: request = Request(item['url']) request.meta['item'] = item return [request] def store_link(self, info, item, file_id): if self.links.find({'file': file_id, 'referer': item['referer'], 'number': item['link_number']}).count() > 0: return self.links.insert({'file': file_id, 'referer': item['referer'], 'text': item['link_text'], 'number': item['link_number'], 'fragment': item['fragment'], 'nofollow': item['nofollow']}) info.spider.crawler.stats.inc_value('links_collected', spider=info.spider)
class ObjectDB: def __init__(self, db): from gridfs import GridFS self.gridfs = GridFS(db) def __setitem__(self, key, obj): self.save(obj, key) def __getitem__(self, key): return self.load(key) def __delitem__(self, key): from pymongo.objectid import ObjectId if not isinstance(key, ObjectId): id = self.gridfs.get_last_version(key)._id else: id = key self.gridfs.delete(id) def __repr__(self): return "Key-value database" def keys(self): """Return list of filenames of objects in the gridfs store.""" return self.gridfs.list() def object_ids(self): """Return list of id's of objects in the gridfs store, which are not id's of objects with filenames.""" v = self.gridfs._GridFS__files.find({"filename": {"$exists": False}}, ["_id"]) return [x["_id"] for x in v] def has_key(self, key): return self.gridfs.exists(filename=key) def save(self, obj, key=None, compress=None): """Save Python object obj to the grid file system self.gridfs. If key is None, the file is stored by MongoDB assigned ObjectID, and that id is returned. """ from sage.all import dumps data = dumps(obj, compress=compress) if key is not None: self.gridfs.put(data, filename=key) return key else: # store by MongoDB assigned _id only, and return that id. return self.gridfs.put(data) def load(self, key, compress=True): from pymongo.objectid import ObjectId if isinstance(key, ObjectId): data = self.gridfs.get(key).read() else: data = self.gridfs.get_last_version(key).read() from sage.all import loads return loads(data, compress=compress)
class GridFUSE(Operations): DEFAULT = ('mongodb://127.0.0.1/gridfs/fs',) FMODE = (stat.S_IRWXU|stat.S_IROTH|stat.S_IRGRP)^stat.S_IRUSR DMODE = FMODE|stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH ST = ({ 'st_mode': stat.S_IRWXU|stat.S_IRWXG|stat.S_IRWXO, 'st_ino': 0, 'st_dev': 0, 'st_nlink': 1, 'st_uid': os.geteuid(), 'st_gid': os.getegid(), 'st_size': 0, 'st_atime': 0, 'st_mtime': 0, 'st_ctime': 0, }) def __repr__(self): return '<%s.%s: %s>' % ( __name__, self.__class__.__name__, ' '.join([ ('%s=%r' % x) for x in [ ('fs', self.fs), ]])) def __init__(self, nodes=None, db=None, coll=None, *args, **kwds): super(GridFUSE, self).__init__() nodes = nodes or GridFUSE.DEFAULT if isinstance(nodes, basestring): nodes = [nodes] cluster = list() for node in nodes: uri = urlsplit(node) if not uri.scheme: cluster.append(node) continue if uri.scheme != 'mongodb': raise TypeError('invalid uri.scheme: %r' % uri.scheme) node_db, _, node_coll = uri.path.strip('/').partition('/') if db is None and node_db: db = node_db if coll is None and node_coll: coll = node_coll.replace('/', '.') if node_db and uri.username is None: node_db = str() cluster.append(urlunsplit(( uri.scheme, uri.netloc, node_db, uri.query, uri.fragment, ))) if not db or not coll: raise TypeError('undefined db and/or root collection') conn = self.conn = MongoClient(cluster) self.debug = bool(kwds.pop('debug', False)) self.gfs = GridFS(conn[db], collection=coll) self.fs = conn[db][coll] self._ctx = Context(self) if not self.gfs.exists(filename=''): self.mkdir() def __call__(self, op, path, *args): if not hasattr(self, op): raise FuseOSError(EFAULT) ret = getattr(self, op)(path.strip('/'), *args) if self.debug: self._debug(op, path, args, ret) return ret def _debug(self, op, path, args, ret): own = op in self.__class__.__dict__ sys.stderr.write('%s:%s:%i/%i/%i\n' % ( (op.upper(), own) + fuse_get_context() )) sys.stderr.write(':: %s\n' % path) if op not in ('read', 'write'): sys.stderr.write(':: %s\n' % pf(args)) sys.stderr.write(':: %s\n' % pf(ret)) sys.stderr.write('\n') sys.stderr.flush() def getattr(self, path, fh): spec = None if fh is not None: fh, spec = self._ctx.get(fh) elif self.gfs.exists(filename=path, visible=True): spec = self.gfs.get_last_version(path) if spec is None: raise FuseOSError(ENOENT) st = spec.stat.copy() st['st_size'] = spec.length return st def rename(self, path, new): new = new.strip('/') dirname = basename = None if new: dirname, basename = pth.split(new) self.fs.files.update( {'filename': path, 'visible': True}, {'$set': {'filename': new, 'dirname': dirname}}, upsert=False, multi=False, ) def chmod(self, path, mode): self.fs.files.update( {'filename': path, 'visible': True}, {'$set': {'stat.st_mode': mode}}, upsert=False, multi=False, ) def chown(self, path, uid, gid): self.fs.files.update( {'filename': path, 'visible': True}, {'$set': {'stat.st_uid': uid, 'stat.st_gid': gid}}, upsert=False, multi=False, ) def _ent(self, path): if self.gfs.exists(filename=path, visible=True): raise FuseOSError(EEXIST) dirname = basename = None if path: dirname, basename = pth.split(path) now = time.time() st = self.ST.copy() st.update(st_ctime=now, st_mtime=now, st_atime=now) return self.gfs.new_file( filename=path, stat=st, dirname=dirname, visible=True, ) def create(self, path, mode=FMODE, fi=None): with self._ent(path) as spec: spec._file['stat'].update(st_mode=mode|S_IFREG) file = spec._file file.pop('_id') fh, spec = self._ctx.acquire(GridIn(self.fs, **file)) if fi is not None: fi.fh = fh return 0 return fh def mkdir(self, path='', mode=DMODE): with self._ent(path) as spec: spec._file['stat'].update(st_mode=mode|S_IFDIR) return 0 #TODO: impl? def link(self, path, source): raise FuseOSError(ENOTSUP) def symlink(self, path, source): with self._ent(path) as spec: spec._file['stat'].update(st_mode=0o0777|S_IFLNK) spec.write(str(source)) return 0 def readlink(self, path): spec = None if self.gfs.exists(filename=path, visible=True): spec = self.gfs.get_last_version(path) if spec is None: raise FuseOSError(ENOENT) elif not spec.stat['st_mode'] & S_IFLNK > 0: raise FuseOSError(EINVAL) return spec.read() def readdir(self, path, fh): spec = None if fh is not None: fh, spec = self._ctx.get(fh) elif self.gfs.exists(filename=path, visible=True): spec = self.gfs.get_last_version(path) if spec is None: raise FuseOSError(ENOENT) elif not spec.stat['st_mode'] & S_IFDIR > 0: raise FuseOSError(ENOTDIR) for rel in ('.', '..'): yield rel for sub in self.fs.files.find({ 'dirname': path, 'visible': True, }).distinct('filename'): yield pth.basename(sub) def open(self, path, flags=None): #TODO: handle os.O_* flags? fh, spec = self._ctx.get(path) if hasattr(flags, 'fh'): flags.fh = fh return 0 return fh opendir = open def release(self, path, fh): return self._ctx.release(fh) releasedir = release def read(self, path, size, offset, fh): spec = self.gfs.get_last_version(path) spec.seek(offset, os.SEEK_SET) return spec.read(size) def write(self, path, data, offset, fh): if fh is not None: fh = getattr(fh, 'fh', fh) fh, spec = self._ctx.get(fh) elif self.gfs.exists(filename=path, visible=True): fh, spec = self._ctx.acquire(path) if not hasattr(spec, 'write'): self.truncate(path, 0, fh=fh) spec = self._ctx._fd[fh] spec.write(data) return len(data) def unlink(self, path): if not path: #...cannot remove mountpoint raise FuseOSError(EBUSY) spec = self.gfs.get_last_version(path) if spec is None or not spec.visible: raise FuseOSError(ENOENT) self.fs.files.update( {'filename': path, 'visible': True}, {'$set': {'visible': False}}, upsert=False, multi=True, ) return 0 rmdir = unlink def truncate(self, path, length, fh=None): if length != 0: raise FuseOSError(ENOTSUP) spec = None if fh is not None: fh = getattr(fh, 'fh', fh) fh, spec = self._ctx.get(fh) elif self.gfs.exists(filename=path, visible=True): spec = self.gfs.get_last_version(path) if spec is None: raise FuseOSError(EBADF) if hasattr(spec, 'write') and spec._chunk_number==0: spec._buffer.truncate(0) spec._buffer.seek(0) spec._position = 0 else: #FIXME: this is terrible... whole class needs refactor fi = spec._file fi.pop('_id') with self.gfs.new_file(**fi) as zero: self.unlink(path) if fh: self._ctx.release(fh) self._ctx._fd[fh] = self.gfs.new_file(**fi) return 0
class FileStoreMongo(FileStore): """ Filestore database using GridFS (see :mod:`gridfs`) :arg pymongo.database.Database connection: MongoDB database object """ def __init__(self, connection): self._conn=connection self.new_context() self._fs=GridFS(self.database) def new_file(self, **kwargs): """ See :meth:`FileStore.new_file` :rtype: :class:`gridfs.grid_file.GridIn` """ self.delete_files(**kwargs) return self._fs.new_file(**kwargs) def delete_files(self, **kwargs): """ See :meth:`FileStore.delete_files` """ while self._fs.exists(kwargs): self._fs.delete(self._fs.get_last_version(**kwargs)._id) def get_file(self, **kwargs): """ See :meth:`FileStore.get_file` :rtype: :class:`gridfs.grid_file.GridOut` """ if self._fs.exists(kwargs): return self._fs.get(self._fs.get_last_version(**kwargs)._id) else: return None def create_file(self, file_handle, **kwargs): """ See :meth:`FileStore.create_file` """ with self.new_file(**kwargs) as f: f.write(file_handle.read()) def copy_file(self, file_handle, **kwargs): """ See :meth:`FileStore.copy_file` """ file_handle.write(self.get_file(**kwargs).read()) def new_context(self): """ Reconnect to the filestore. This function should be called before the first filestore access in each new process. """ self.database=pymongo.database.Database(self._conn, mongo_config['mongo_db']) uri=mongo_config['mongo_uri'] if '@' in uri: # strip off optional mongodb:// part if uri.startswith('mongodb://'): uri=uri[len('mongodb://'):] result=self.database.authenticate(uri[:uri.index(':')],uri[uri.index(':')+1:uri.index('@')]) if result==0: raise Exception("MongoDB authentication problem") valid_untrusted_methods=()
class Manager(object): """ Holds data regarding database management. """ def __init__(self, host=None, port=None, database=None, username=None, prefix=None): """ Initializes a connection and database. """ from pymongo import Connection from gridfs import GridFS from .. import pymongo_host, pymongo_port, vasp_database_name, OUTCARS_prefix, pymongo_username super(Manager, self).__init__() self._host = host if host is not None else pymongo_host """ Host where the database is hosted. """ self._port = port if port is not None else pymongo_port """ Port of the host where the database is hosted. """ self._vaspbase_name = database if database is not None else vasp_database_name """ Name of the vasp database. """ self._outcars_prefix = prefix if prefix is not None else OUTCARS_prefix """ Username for database. """ self._username = username if username is not None else pymongo_username """ Name of the OUTCAR database. """ self.connection = Connection(host=self._host, port=self._port) """ Holds connection to pymongo. """ self.database = getattr(self.connection, self._vaspbase_name) """ Database within pymongo. """ self.outcars = GridFS(self.database, self.outcars_prefix) """ GridFS object for OUTCARs. """ self.comments = self.database["{0}.comments".format(self.outcars_prefix)] """ Collection of comments attached when adding OUTCAR's to a file. """ self.files = self.database["{0}.files".format(self.outcars_prefix)] """ OUTCAR files collection. """ self.extracted = self.database["extracted"] """ Collection with pre-extracted values from the outcar. """ self.fere = self.database["fere_summary"] """ Collection with FERE ground-state analysis. """ @property def host(self): """ Host where the database is hosted. """ return self._host @property def port(self): """ Port of the host where the database is hosted. """ return self._port @property def vasp_database_name(self): """ Name of the vasp database. """ return self._vaspbase_name @property def outcars_prefix(self): """ Name of the OUTCAR GridFS collection. """ return self._outcars_prefix def push(self, filename, outcar, comment, **kwargs): """ Pushes OUTCAR to database. :raise ValueError: if the corresponding object is not found. :raise IOError: if the path does not exist or is not a file. """ from hashlib import sha512 from os import uname from .misc import get_username assert len(comment.replace(" ", "").replace("\n", "")) != 0, ValueError("Cannot push file if comment is empty.") try: kwargs["comment"] = self.comments.find({"text": comment}).next() except StopIteration: # add comment to database kwargs["comment"] = self.comments.insert({"text": comment}) print kwargs["comment"] hash = sha512(outcar).hexdigest() if self.outcars.exists(sha512=hash): print "{0} already in database. Please use 'ladabase.update'.".format(filename) return if "filename" not in kwargs: kwargs["filename"] = filename if "uploader" not in kwargs: kwargs["uploader"] = get_username() if "host" not in kwargs: kwargs["host"] = uname()[1] compression = kwargs.get("compression", None) kwargs["compression"] = compression if compression == "bz2": from bz2 import compress return self.outcars.put(compress(outcar), sha512=hash, **kwargs) elif compression is None: return self.outcars.put(outcar, sha512=hash, **kwargs) else: raise ValueError("Invalid compression format {0}.".format(compression)) def find_fromfile(self, path): """ Returns the database object corresponding to this file. :raise ValueError: if the corresponding object is not found. :raise IOError: if the path does not exist or is not a file. Finds the corresponding file using sha512 hash. """ from os.path import exists, isfile from hashlib import sha512 from ..opt import RelativeDirectory ipath = RelativeDirectory(path).path assert exists(ipath), IOError("{0} does not exist.".format(path)) assert isfile(ipath), IOError("{0} is not a file.".format(path)) with open(ipath, "r") as file: string = file.read() hash = sha512(string).hexdigest() assert self.outcars.exists(sha512=hash), ValueError("{0} could not be found in database.".format(path)) return self.files.find_one({"sha512": hash}) def __contains__(self, path): """ True if path already in database. """ from os.path import exists from hashlib import sha512 from ..opt import RelativeDirectory path = RelativeDirectory(path).path if not exists(path): ValueError("File {0} does not exist.".format(path)) with open(path, "r") as file: string = file.read() hash = sha512(string).hexdigest() return self.outcars.exists(sha512=hash)
class TestJob(object): def setup_class(self): conn = pymongo.MongoClient('localhost') self.db = conn.joblog_test self.fs = GridFS(self.db) def setup_method(self, method): self.clf = LogisticRegression self.x = np.random.normal(0, 1, (10, 3)) self.y = np.random.randint(0, 2, (10)) self.params = dict(penalty='l2', C=2) self.db.drop_collection('test') self.collection = self.db['test'] def test_run(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) clf = j.run() assert isinstance(clf, LogisticRegression) clf2 = self.clf(**self.params).fit(self.x, self.y) np.testing.assert_array_equal(clf.predict(self.x), clf2.predict(self.x)) def test_result_property(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) assert j.result is None j.run() assert j.result is not None def test_nonunique_cached(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) x = self.x.copy() y = self.y.copy() j2 = Job(self.clf, x, y, self.params, self.collection) assert j.result is None assert j2.result is None clf1 = j2.run() assert j.result is not None def test_detect_unique(self): """Each unique input gets a new entry""" j = Job(self.clf, self.x, self.y, self.params, self.collection) assert j.result is None j.run() self.x[0] += 5 j = Job(self.clf, self.x, self.y, self.params, self.collection) assert j.result is None j.run() self.y[0] += 5 j = Job(self.clf, self.x, self.y, self.params, self.collection) assert j.result is None j.run() self.params = {} j = Job(self.clf, self.x, self.y, self.params, self.collection) assert j.result is None j.run() self.clf = LinearRegression j = Job(self.clf, self.x, self.y, self.params, self.collection) assert j.result is None j.run() def test_rerun_overrides_cache(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) clf1 = j.run() #hack: modify x in place after running job. # rerunning will change classification rule self.x[:] *= 100 clf1b = j.run() # should not rerun clf2 = j.rerun() # should rerun assert (clf1.coef_ == clf1b.coef_).all() assert not (clf1.coef_ == clf2.coef_).all() def test_nosave(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) clf1 = j.run(store=None) assert j.result is None def test_save_score(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) clf1 = j.run(store='score') assert j.result == clf1.score(self.x, self.y) def test_save_predict(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) clf1 = j.run(store='prediction') np.testing.assert_array_equal(j.result, clf1.predict(self.x)) def test_duplicate(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) assert not j.duplicate j2 = Job(self.clf, self.x, self.y, self.params, self.collection) assert not j.duplicate assert j2.duplicate def test_label(self): j = Job(self.clf, self.x, self.y, self.params, self.collection, label='test_label') e = self.collection.find_one({'label':'test_label'}) assert e['params'] == self.params def test_file_cleanup(self): #should delete old files if changing result j = Job(self.clf, self.x, self.y, self.params, self.collection, label='test_label') clf = j.run() e = self.collection.find_one({'label': 'test_label'}) fid = e['result'] j.result = 5 assert not self.fs.exists(fid) def test_get_set(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) j2 = Job(self.clf, self.x, self.y, self.params, self.collection) j['test_extra'] = 123 assert j['test_extra'] == 123 #confirm it's actually in the database assert j2['test_extra'] == 123 def test_invalid_get(self): j = Job(self.clf, self.x, self.y, self.params, self.collection) with pytest.raises(KeyError) as e: j['test'] assert e.value[0] == 'No attribute test associated with this job'
class MongoFileStorage(MongoStorage, FileStorage): FILENAME = 'filename' def __init__(self, *args, **kwargs): super(MongoFileStorage, self).__init__(*args, **kwargs) self.gridfs = None def _connect(self, **kwargs): result = super(MongoFileStorage, self)._connect(**kwargs) if result: self.gridfs = GridFS( database=self._database, collection=self.get_table() ) return result def put(self, name, data, meta=None): try: fs = self.new_file(name=name, meta=meta) fs.write(data=data) finally: fs.close() def put_meta(self, name, meta): try: oldf, _meta = self.get(name, with_meta=True) _meta.update(meta) fs = self.new_file(name=name, meta=_meta) while True: data = oldf.read(512) if not data: break fs.write(data=data) finally: fs.close() def get(self, name, version=-1, with_meta=False): result = None try: gridout = self.gridfs.get_version(filename=name, version=version) except NoFile: pass else: if with_meta: result = MongoFileStream(gridout), gridout.metadata else: result = MongoFileStream(gridout) return result def get_meta(self, name): result = self.get(name, with_meta=True) if result is not None: result = result[1] return result def exists(self, name): result = self.gridfs.exists(filename=name) return result def find( self, names=None, meta=None, sort=None, limit=-1, skip=0, with_meta=False ): request = {} if names is not None: if isinstance(names, basestring): request[MongoFileStorage.FILENAME] = names else: request[MongoFileStorage.FILENAME] = {'$in': names} if meta is not None: for metafield in meta: field = 'metadata.{0}'.format(metafield) request[field] = meta[metafield] cursor = self.gridfs.find(request) if sort is not None: cursor.sort(sort) if limit > 0: cursor.limit(limit) if skip > 0: cursor.skip(skip) if with_meta: result = ( (MongoFileStream(gridout), gridout.metadata) for gridout in cursor ) else: result = (MongoFileStream(gridout) for gridout in cursor) return result def list(self): return self.gridfs.list() def new_file(self, name=None, meta=None, data=None): kwargs = {} if name is None: name = str(uuid()) kwargs['filename'] = name if meta is not None: kwargs['metadata'] = meta gridout = self.gridfs.new_file(**kwargs) result = MongoFileStream(gridout) if data is not None: result.write(data) return result def delete(self, names=None): if names is None: names = self.gridfs.list() names = ensure_iterable(names) for name in names: while True: fs = self.get(name) if fs is None: break self.gridfs.delete(file_id=fs.get_inner_object()._id)
def lookup_tables_as_collection_and_gridfs(cfg, maindb): """Import lookup tables (from txt file) as Collection and GridFS Args: cfg: SEIMS config object maindb: workflow model database """ for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()): # import each lookup table as a collection and GridFS file. c_list = maindb.collection_names() if not StringClass.string_in_list(tablename.upper(), c_list): maindb.create_collection(tablename.upper()) else: maindb.drop_collection(tablename.upper()) # initial bulk operator bulk = maindb[tablename.upper()].initialize_ordered_bulk_op() # delete if the tablename gridfs file existed spatial = GridFS(maindb, DBTableNames.gridfs_spatial) if spatial.exists(filename=tablename.upper()): x = spatial.get_version(filename=tablename.upper()) spatial.delete(x._id) # read data items data_items = read_data_items_from_txt(txt_file) field_names = data_items[0][0:] item_values = list() # import as gridfs file for i, cur_data_item in enumerate(data_items): if i == 0: continue data_import = dict() # import as Collection item_value = list() # import as gridfs file for idx, fld in enumerate(field_names): if MathClass.isnumerical(cur_data_item[idx]): tmp_value = float(cur_data_item[idx]) data_import[fld] = tmp_value item_value.append(tmp_value) else: data_import[fld] = cur_data_item[idx] bulk.insert(data_import) if len(item_value) > 0: item_values.append(item_value) MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename) # begin import gridfs file n_row = len(item_values) # print(item_values) if n_row >= 1: n_col = len(item_values[0]) for i in range(n_row): if n_col != len(item_values[i]): raise ValueError('Please check %s to make sure each item has ' 'the same numeric dimension. The size of first ' 'row is: %d, and the current data item is: %d' % (tablename, n_col, len(item_values[i]))) else: item_values[i].insert(0, n_col) metadic = {ModelParamDataUtils.item_count: n_row, ModelParamDataUtils.field_count: n_col} cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic) header = [n_row] fmt = '%df' % 1 s = pack(fmt, *header) cur_lookup_gridfs.write(s) fmt = '%df' % (n_col + 1) for i in range(n_row): s = pack(fmt, *item_values[i]) cur_lookup_gridfs.write(s) cur_lookup_gridfs.close()
def test_post_grid_calendar_returns_success_status(app, coverage, get_app_context): filename = 'export_calendars.zip' path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'fixtures/gridcalendar/', filename) files = {'file': (open(path, 'rb'), 'export_calendars.zip')} raw = app.post('/coverages/jdr/grid_calendar', data=files) r = to_json(raw) assert raw.status_code == 200 assert r.get('message') == 'OK' raw = app.get('/coverages') r = to_json(raw) assert len(r['coverages']) == 1 assert 'grid_calendars_id' in r['coverages'][0] gridfs = GridFS(mongo.db) file_id = r['coverages'][0]['grid_calendars_id'] assert gridfs.exists(ObjectId(file_id)) #we update the file (it's the same, but that's not the point) files = {'file': (open(path, 'rb'), 'export_calendars.zip')} raw = app.post('/coverages/jdr/grid_calendar', data=files) assert raw.status_code == 200 raw = app.get('/coverages') r = to_json(raw) assert len(r['coverages']) == 1 assert 'grid_calendars_id' in r['coverages'][0] #it should be another file assert file_id != r['coverages'][0]['grid_calendars_id'] #the previous file has been deleted assert not gridfs.exists(ObjectId(file_id)) #and the new one exist assert gridfs.exists(ObjectId(r['coverages'][0]['grid_calendars_id']))