def save_file(self, filename, fileobj, base='fs', content_type=None): """Save the file-like object to GridFS using the given filename. Returns ``None``. .. code-block:: python @app.route('/uploads/<path:filename>', methods=['POST']) def save_upload(filename): mongo.save_file(filename, request.files['file']) return redirect(url_for('get_upload', filename=filename)) :param str filename: the filename of the file to return :param file fileobj: the file-like object to save :param str base: base the base name of the GridFS collections to use :param str content_type: the MIME content-type of the file. If ``None``, the content-type is guessed from the filename using :func:`~mimetypes.guess_type` """ if not isinstance(base, text_type): raise TypeError('"base" must be string or unicode') if not (hasattr(fileobj, 'read') and callable(fileobj.read)): raise TypeError('"fileobj" must have read() method') if content_type is None: content_type, _ = guess_type(filename) storage = GridFS(self.db, base) storage.put(fileobj, filename=filename, content_type=content_type)
def test_it_saves_files(self): fileobj = BytesIO(b"these are the bytes") self.mongo.save_file("my-file", fileobj) gridfs = GridFS(self.mongo.db) assert gridfs.exists({"filename": "my-file"})
class mongostore(storage): def __init__(self, mongo_host="127.0.0.1", mongo_port=27017, mongo_db='canopsis', mongo_collection='perfdata', mongo_safe=False): storage.__init__(self) self.logger.debug(" + Init MongoDB Store") self.mongo_host = mongo_host self.mongo_port = mongo_port self.mongo_db = mongo_db self.mongo_collection = mongo_collection self.mongo_safe = mongo_safe self.logger.debug(" + Connect to MongoDB (%s/%s@%s:%s)" % (mongo_db, mongo_collection, mongo_host, mongo_port)) self.conn=Connection(self.mongo_host, self.mongo_port) self.db=self.conn[self.mongo_db] self.collection = self.db[self.mongo_collection] self.grid = GridFS(self.db, self.mongo_collection+".fs") def drop(self): self.db.drop_collection(self.mongo_collection) self.db.drop_collection(self.mongo_collection+".fs.chunks") self.db.drop_collection(self.mongo_collection+".fs.files") def set_raw(self, key, value): try: self.collection.update({'_id': key}, {"$set": { 'd': value } }, upsert=True, safe=self.mongo_safe) except InvalidStringData: self.rm(key) self.grid.put(value, _id=key) except Exception, err: self.logger.error(err) self.logger.error(self.db.error())
def start(mode = 2): # mode = 1 - registration mode # mode = 2 - identification mode #mode = 2 dev = detect_printreader() #try to detect fingerprint reader if dev !=None : client = pymongo.MongoClient("localhost", 27017)#connect to mongodb db = client.fdb #use fdb database fs = GridFS(db) if mode==1: print '-------------------------------------------------------------registration mode' current_finger = scan_finger(dev) current_finger_file_id = fs.put(current_finger, filename="employee") print 'Fingerprint saved with id: ' + str(current_finger_file_id) elif mode==2: # python run some code when the program is killed by a signal # signal.signal(signal.SIGABRT, cleanup) # signal.signal(signal.SIGINT, cleanup) # signal.signal(signal.SIGKILL, cleanup) http://crunchtools.com/unixlinux-signals-101/ # signal.signal(signal.SIGTERM, cleanup) print '------------------------------------------------------------identification mode' while True: cursor_for_files = db.fs.files.find() all_fingerprint_files_ids = [] for current_file in cursor_for_files: all_fingerprint_files_ids.append(current_file["_id"]) all_fingerprint_files_data = [] for file_id in all_fingerprint_files_ids: all_fingerprint_files_data.append(fs.get(file_id).read()) if verify_finger(dev, all_fingerprint_files_data)!=None: print "Yeah, you are in the system" else: print "See you first time"
def WriteTempFile(self, data, hash_name=None): if self.use_cache == True: if hash_name is None: hash = md5(self.url ) hash_name = hash.hexdigest() self.last_hash_name = hash_name self.log.debug('write file to cache: ', hash_name) self.log.debug('use mongo: %s' % self.use_mongo) # open(self.download_temp+hash_name, 'wb').write(data) if self.use_mongo == False: f_name = self.download_temp + hash_name + '.gz' f = gzip.open(f_name, 'wb') f.write(data) f.close() if self.use_mongo == True: connection = Connection("localhost", 27017) db = connection['parser'] s = StringIO.StringIO() f = gzip.GzipFile(fileobj=s, mode='wb') f.write(data) f.close() val = s.getvalue() s.close() del (s) del (f) fs = GridFS(db) fp = fs.open(hash_name , 'w', self.download_temp.replace('/', '') ) fp.write(val) fp.close() connection.disconnect()
def test_gridfs(self): client = self.client fs = GridFS(client.pymongo_test) def new_file(session=None): grid_file = fs.new_file(_id=1, filename='f', session=session) # 1 MB, 5 chunks, to test that each chunk is fetched with same lsid. grid_file.write(b'a' * 1048576) grid_file.close() def find(session=None): files = list(fs.find({'_id': 1}, session=session)) for f in files: f.read() self._test_ops( client, (new_file, [], {}), (fs.put, [b'data'], {}), (lambda session=None: fs.get(1, session=session).read(), [], {}), (lambda session=None: fs.get_version('f', session=session).read(), [], {}), (lambda session=None: fs.get_last_version('f', session=session).read(), [], {}), (fs.list, [], {}), (fs.find_one, [1], {}), (lambda session=None: list(fs.find(session=session)), [], {}), (fs.exists, [1], {}), (find, [], {}), (fs.delete, [1], {}))
def fetch_hdu_from_mongo(filename): if MONGODB["enabled"]: with closing( Connection(host=MONGODB["host"], port=MONGODB["port"]) ) as mongo_connection: gfs = GridFS(mongo_connection[MONGODB["database"]]) return pyfits.open(gfs.get_version(filename), mode="readonly")
def raw_content(request, content_id=None): # TODO: migrate this view to the APIs? (would need a GridFSResource) if not content_id: raise Http404("Content not found") if not isinstance(content_id, ObjectId): try: content_id = ObjectId(content_id) except: raise Http404("Content not found") dbfs = MongoClient().thugfs fs = GridFS(dbfs) fo = fs.get(content_id) try: content = base64.b64decode(fo.read()) except: raise Http404("Content not found") mime = magic.from_buffer(content, mime=True) response = HttpResponse(content, content_type=mime) response['Content-Disposition'] = 'attachment; filename="{}"'.format(fo.filename or fo.md5) return response
def save_file(self, filename, fileobj, base="fs", content_type=None, **kwargs): """Save a file-like object to GridFS using the given filename. .. code-block:: python @app.route("/uploads/<path:filename>", methods=["POST"]) def save_upload(filename): mongo.save_file(filename, request.files["file"]) return redirect(url_for("get_upload", filename=filename)) :param str filename: the filename of the file to return :param file fileobj: the file-like object to save :param str base: base the base name of the GridFS collections to use :param str content_type: the MIME content-type of the file. If ``None``, the content-type is guessed from the filename using :func:`~mimetypes.guess_type` :param kwargs: extra attributes to be stored in the file's document, passed directly to :meth:`gridfs.GridFS.put` """ if not isinstance(base, text_type): raise TypeError("'base' must be string or unicode") if not (hasattr(fileobj, "read") and callable(fileobj.read)): raise TypeError("'fileobj' must have read() method") if content_type is None: content_type, _ = guess_type(filename) storage = GridFS(self.db, base) id = storage.put(fileobj, filename=filename, content_type=content_type, **kwargs) return id
def get_asset(file_id): _fs = GridFS(app.data.driver.db) _file = _fs.get(ObjectId(file_id)) content = _file.read() return Response(content, mimetype=str(_file.content_type))
def img(image_id): """Retrieves an image from GridFS""" oid = ObjectId(image_id) fs = GridFS(db) nid = fs.get(oid) return Response(nid, mimetype="image/jpeg", direct_passthrough=True)
def table_copy_doc(request): """ 1) Создаем пустой документа """ data = get_post(request) old_id = data['doc_id'] from libs.files.files import get_nf, get_file_meta, add_file_raw # 1) копируем просто документ new_id_owner = simply_copy_doc(request, old_id) # 2) Дублирование картинки from gridfs import GridFS fs = GridFS(request.db) for fn in request.db.fs.files.find({'doc_id':old_id, 'file_name':re.compile('^orig_', re.I | re.U)}): # TODO таки уменьшать картинку при занесении, сейчас просто название меняется if not fn: return None, None, None f = fs.get(fn['_id']).read() fs.put(f, file_name ='thumb_1'+fn['file_name'], doc_id = new_id_owner, proc_id=fn['proc_id'], mime = fn['mime']) fs.put(f, file_name ='orig_1'+fn['file_name'], doc_id = new_id_owner, proc_id=fn['proc_id'], mime = fn['mime']) # add_file_raw(fn['proc_id'], old_id, f, fn['mime'], '1'+fn['file_name'] ) # 3) Дублирование тех документов что он owner for res in request.db.doc.find({'owner':old_id}): doc_id = simply_copy_doc(request, res['_id']) request.db.doc.update({'_id':doc_id}, {'$set':{'owner':new_id_owner}}) return {"result":"ok"}
def download(id, filename): file = GridFS(getDBConnection().upload).get(ObjectId(id)) response = flask.Response(file.__iter__()) response.headers['content-type'] = file.metadata['content_type'] response.content_length = file.length return response
def execute_inspections(inspection_ids, gridfs_id, frame_meta): # Works like execute_inspection below, but takes multiple inspection ID's # If no inspection_ids, assume all inspections if not inspection_ids: inspection_ids = [ i.id for i in M.Inspection.objects ] db = M.Inspection._get_db() fs = GridFS(db) image = Image(PIL.open(fs.get(gridfs_id))) frame = M.Frame() frame.image = image frame.metadata = frame_meta features = [] for insp_id in inspection_ids: insp = M.Inspection.objects.get(id=insp_id) try: features += insp.execute(frame) print 'Finished inspection %s on image %s' % (insp_id, gridfs_id) except: print 'Inspection Failed' print 'Finished inspections on image %s' % gridfs_id return [ f.feature for f in features ]
class FilesWrapper(object): def __init__(self, db, *args, **kwargs): self.storage = GridFS(db) self.comments_wrapper = CommentsWrapper(db) def list(self): """ Returns are list of files. """ uuids = {f.uuid: {'name': f.filename, 'uuid': f.uuid, 'comments': 0, 'version': f.version} for f in self.storage.find()} comments = self.comments_wrapper.get_grouped_counts(uuids.keys()) def _up(uuid, count): uuids[uuid]['comments'] = count return uuids[uuid] return [_up(*c) for c in comments] def get(self, uuid, version=0): """ Return selected file or None if they does not exists. """ try: return self.storage.get_version(uuid=uuid, version=version) except NoFile: return None def add(self, file): """ Save file into storage. Give him uuid. """ uuid = unicode(uuid4()) self.storage.put(file, filename=file.filename, uuid=uuid, version=0) return uuid
def pre_save(self, model_instance, add): oid = getattr(model_instance, "_%s_oid" % self.attname, None) value = getattr(model_instance, "_%s_val" % self.attname, None) if not getattr(model_instance, "id"): return u'' if value == getattr(model_instance, "_%s_cache" % self.attname, None): return oid from django.db import connections gdfs = GridFS(connections[self.model.objects.db].db_connection.db) if not self._versioning and not oid is None: gdfs.delete(oid) if not self._as_string: value.seek(0) value = value.read() oid = gdfs.put(value) setattr(self, "_%s_oid" % self.attname, oid) setattr(self, "_%s_cache" % self.attname, value) return oid
def init(self): print "Initilisation of the mongodb module" self.con = Connection(self.uri) # Open a gridfs connection self.db = getattr(self.con, self.database) self.hosts_fs = GridFS(self.db, collection='retention_hosts') self.services_fs = GridFS(self.db, collection='retention_services')
def tearDown(self): gridfs = GridFS(self.mongo.db) files = list(gridfs.find()) for gridfile in files: gridfs.delete(gridfile._id) super(GridFSCleanupMixin, self).tearDown()
class DB(pymongo.database.Database): conn = pymongo.Connection() RINGTONE_TYPE = 1 def __init__(self,dbtype = "ordinary"): d_ = { "ordinary":"grat_compaign", "test":"grat_compaign_test", "file":"grat_files" } dbname = d_.get(dbtype,"grat_compaign_test") pymongo.database.Database.__init__(self,DB.conn,dbname) self.gridfs = GridFS(self) def save_tone_to_mongo(self,thing2read,filename): print self.gridfs.put(thing2read , filename = filename,type = DB.RINGTONE_TYPE ) print "% saved to db" %filename def get_files(self,criteria): obj = self.fs.files.find(criteria) return obj def get_all_tone_ids(self): tone_ids = list() for t in self.get_files({"type":DB.RINGTONE_TYPE}): tone_ids.append((t["_id"],t["filename"])) print "%d tones" %(len(tone_ids)) return tone_ids def get_tone(self,oid): fo = self.gridfs.get(oid) fp = "/home/xiaohan/Desktop/tmp.tone.mp3" with open(fp , "w") as f: f.write(fo.read()) return fp
def process(self, document): database = pymongo.MongoClient(host=config.MONGODB_CONFIG['host'], port=config.MONGODB_CONFIG['port'] )[config.MONGODB_CONFIG['database']] gridfs = GridFS(database, config.MONGODB_CONFIG['gridfs_collection']) gridfs.delete(ObjectId(document['file_id'])) return {}
def remove_file(self, filename, base='fs'): storage = GridFS(self.db, base) try: grid_file = storage.get_last_version(filename) storage.delete(grid_file._id) return True except NoFile: return False
def _get_unique_filename(name): fs = GridFS(_get_db()) file_root, file_ext = os.path.splitext(name) count = itertools.count(1) while fs.exists(filename=name): # file_ext includes the dot. name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext)) return name
def test_remove_file(self): """Tests removing a gridfs file """ fs = GridFS(self.conn['test'], 'test') id = fs.put("test file", filename="test.txt", encoding='utf8') assert_soon(lambda: sum(1 for _ in self.solr_conn.search("*:*")) == 1) fs.delete(id) assert_soon(lambda: sum(1 for _ in self.solr_conn.search("*:*")) == 0)
def test_it_guesses_type_from_filename(self): fileobj = BytesIO(b"these are the bytes") self.mongo.save_file("my-file.txt", fileobj) gridfs = GridFS(self.mongo.db) gridfile = gridfs.find_one({"filename": "my-file.txt"}) assert gridfile.content_type == "text/plain"
def _get_unique_filename(name, db_alias=DEFAULT_CONNECTION_NAME, collection_name='fs'): fs = GridFS(get_db(db_alias), collection_name) file_root, file_ext = os.path.splitext(name) count = itertools.count(1) while fs.exists(filename=name): # file_ext includes the dot. name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext)) return name
def test_it_saves_files_with_props(self): fileobj = BytesIO(b"these are the bytes") self.mongo.save_file("my-file", fileobj, foo="bar") gridfs = GridFS(self.mongo.db) gridfile = gridfs.find_one({"filename": "my-file"}) assert gridfile.foo == "bar"
def upload(): stream = None fn = None content_type = None if request.form['url'] != "": stream = urlopen(request.form['url']) fn = path.basename(urlparse(request.form['url']).path) content_type = stream.info().gettype() else: stream = request.files['file'] fn = request.files['file'].filename content_type = request.files['file'].content_type metadata = { "name": request.form['name'], "full_description": request.form['full_description'], "related_to": request.form['related_to'], "data_format": request.form['data_format'], "creator": request.form['creator'], "reference": request.form['reference'], "bibtex": request.form['bibtex'], "comments": request.form['comments'], "uploader": current_user.name, "uploader_id": current_user.id, "time": datetime.datetime.utcnow(), "original_file_name": fn, "status": "unmoderated", "version": "1", "file_url": request.form['url'], "content_type": content_type } flask.flash("Received file '%s' and awaiting moderation from an administrator" % fn) upload_db = getDBConnection().upload upload_fs = GridFS(upload_db) db_id = upload_fs.put(stream.read(), metadata=metadata, filename=fn) logging.info("file '%s' receieved and data with id '%s' stored" % (fn, db_id)) if fn[-4:] == ".tgz" or fn[-4:] == ".tar" or fn[-7:] == ".tar.gz": child_index = [] tar = tarfile.open(fileobj=upload_fs.get(ObjectId(db_id))) for tarinfo in tar: if tarinfo.isfile(): metadata2 = copy.copy(metadata) metadata2['parent_archive_id'] = db_id metadata2['parent_archive_filename'] = fn metadata2['status'] = "unmoderatedchild" metadata2['original_file_name'] = fn + "/" + tarinfo.name metadata2['related_to'] = "" metadata2['content_type'] = "" id = upload_fs.put( tar.extractfile(tarinfo).read(), metadata=metadata2, filename=fn + "/" + tarinfo.name) child_index.append([id, tarinfo.name]) upload_db.fs.files.update({"_id": db_id}, {"$set": {"metadata.child_index": child_index}}) return flask.redirect("/upload/view/" + str(db_id))
def clean(self, val, doc=None): if not self._database: raise FieldException("database is required") if isinstance(val, ObjectId): return val try: f = GridFS(self._database, collection=self._collection) self._args["filename"] = self._args.get("filename", self._name) return f.put(val, **self._args) except Exception as e: raise FieldException(e.message)
def viewAll(): db = getDBConnection().upload fs = GridFS(db) approved = [ fs.get(x['_id']) for x in db.fs.files.find({"metadata.status" : "approved"}) ] unmoderated = [ fs.get(x['_id']) for x in db.fs.files.find({"metadata.status" : "unmoderated"}) ] return render_template("upload-view.html", title = "Uploaded data", bread = get_bread(), approved=approved, unmoderated2=unmoderated)
from gridfs import GridFS import utils.sys.config from utils.gadget.general import SysUtils # 组件包文件存储桶集合 pack_com_files_storage = GridFS(utils.sys.config.g_firmware_db_full, collection='component_files_storage') class PackCOMFilesStorage: @staticmethod def save(file_id, file_name, file_type, contents): # 更新包文件内容到 GridFS 存储桶中 pack_com_files_storage.put(contents, content_type=file_type, filename=file_id, aliases=[file_name]) @staticmethod def fetch(file_id): grid_out = pack_com_files_storage.find_one({'filename': file_id}) item = SysUtils.grid_out_to_dict(grid_out) return item @staticmethod def delete(file_id): file_item = pack_com_files_storage.find_one({'filename': file_id}) if file_item is None: return False pack_com_files_storage.delete(file_item._id)
from datetime import datetime from HTMLParser import HTMLParser import re from pytz import timezone, utc from flask import request from gridfs import GridFS from mongoengine import * import wtforms from wtforms import validators from plog import app, db from plog.utils import randstring uploads = GridFS(db) boundary = re.compile(r'\s') nopunc = re.compile(r'[^a-z0-9]') class TagCloud(Document): tag = StringField(primary_key=True) count = IntField() updated = DateTimeField() meta = { 'allow_inheritance': False, 'indexes': [ { 'fields': ['count', 'tag']
import datetime DIR_PATH = os.path.dirname(os.path.realpath(__file__)) IMAGE_DIR = DIR_PATH + "/tmp_images" GRID_FS_IMAGE_NAMESPACE = "images" LOCAL_IP = "localhost" PORT = 5000 app = Flask(__name__) CORS(app) app.debug = True app.config["MONGO_URI"] = "mongodb://*****:*****@app.route('/') def root(): return "hello world!" # Specify an RFID -> Dish link @app.route('/rfidtodish', methods=['POST']) def register_meal_to_rfid(): data = request.get_json()
def add_tar(self, record=None, name=None, style=None, tar=None, root_dir=None): """ Archives and stores a folder associated with a record. Issues an error if exactly one matching record is not found in the database, or the associated record already has a tar archive. Parameters ---------- database_info : mdcs.MDCS The MDCS class used for accessing the curator database. record : iprPy.Record, optional The record to associate the tar archive with. If not given, then name and/or style necessary to uniquely identify the record are needed. name : str, optional .The name to use in uniquely identifying the record. style : str, optional .The style to use in uniquely identifying the record. tar : bytes, optional The bytes content of a tar file to save. tar cannot be given with root_dir. root_dir : str, optional Specifies the root directory for finding the directory to archive. The directory to archive is at <root_dir>/<name>. (Default is to set root_dir to the current working directory.) tar cannot be given with root_dir. Raises ------ ValueError If style and/or name content given with record or the record already has an archive. """ # Create Record object if not given if record is None: record = self.get_record(name=name, style=style) # Issue a ValueError for competing kwargs elif style is not None or name is not None: raise ValueError('kwargs style and name cannot be given with kwarg record') # Verify that record exists else: record = self.get_record(name=record.name, style=record.style) # Define mongofs mongofs = GridFS(self.mongodb, collection=record.style) # Check if an archive already exists if mongofs.exists({"recordname": record.name}): raise ValueError('Record already has an archive') if tar is None: # Make archive shutil.make_archive(record.name, 'gztar', root_dir=root_dir, base_dir=record.name) # Upload archive filename = Path(record.name + '.tar.gz') with open(filename, 'rb') as f: tries = 0 while tries < 2: if True: mongofs.put(f, recordname=record.name) break else: tries += 1 if tries == 2: raise ValueError('Failed to upload archive 2 times') # Remove local archive copy filename.unlink() elif root_dir is None: # Upload archive tries = 0 while tries < 2: if True: mongofs.put(tar, recordname=record.name) break else: tries += 1 if tries == 2: raise ValueError('Failed to upload archive 2 times') else: raise ValueError('tar and root_dir cannot both be given')
def __init__(self, db): from gridfs import GridFS self.gridfs = GridFS(db)
def send_file(self, filename=None, file_id=None, base="fs", version=-1, cache_for=31536000): """Respond with a file from GridFS. Returns an instance of the :attr:`~flask.Flask.response_class` containing the named file, and implement conditional GET semantics (using :meth:`~werkzeug.wrappers.ETagResponseMixin.make_conditional`). .. code-block:: python @app.route("/uploads/<path:filename>") def get_upload(filename): return mongo.send_file(filename) :param str filename: the filename of the file to return :param str file_id: the file_id of the file to return. (only the hex str eg: '507f191e810c19729de860ea') :param str base: the base name of the GridFS collections to use :param bool version: if positive, return the Nth revision of the file identified by filename; if negative, return the Nth most recent revision. If no such version exists, return with HTTP status 404. :param int cache_for: number of seconds that browsers should be instructed to cache responses """ if not isinstance(base, text_type): raise TypeError("'base' must be string or unicode") if not isinstance(version, num_type): raise TypeError("'version' must be an integer") if not isinstance(cache_for, num_type): raise TypeError("'cache_for' must be an integer") storage = GridFS(self.db, base) try: if filename != None and file_id == None: fileobj = storage.get_version(filename=filename, version=version) elif file_id != None and filename == None: fileobj = storage.get(ObjectId(file_id)) else: abort(404) except NoFile: abort(404) # mostly copied from flask/helpers.py, with # modifications for GridFS data = wrap_file(request.environ, fileobj, buffer_size=1024 * 255) response = current_app.response_class( data, mimetype=fileobj.content_type, direct_passthrough=True, ) response.content_length = fileobj.length response.last_modified = fileobj.upload_date response.set_etag(fileobj.md5) response.cache_control.max_age = cache_for response.cache_control.public = True response.make_conditional(request) return response
#!/usr/bin/env python3 # -*- coding: utf-8 -*- #向 mongodb数据库保存文件 from mongo_db import client from gridfs import GridFS db = client.school gfs = GridFS(db, collection="book") files = open("/Users/superallen/Desktop/myCode/演示文件.txt", "rb") args = {"type": "txt", "keyword": "file"} gfs.put(files, filename="演示文件.txt", **args) files.close()
from gridfs import GridFS from bson import ObjectId reload(sys) sys.setdefaultencoding("utf-8") sys.path.append( os.path.join(os.path.split(os.path.realpath(__file__))[0], '../util')) import loghelper, db, util, oss2_helper #logger loghelper.init_logger("migrate_file", stream=True) logger = loghelper.get_logger("migrate_file") mongo = db.connect_mongo() grid = GridFS(mongo.gridfs) oss2 = oss2_helper.Oss2Helper() def save_oss2_image(grid_id, size=None): if grid_id is None or grid_id.strip() == "": return item = mongo.temp.gridid.find_one({"gridid": grid_id}) if item is not None: return out = grid.get(ObjectId(grid_id)) logger.info("%s -> %s", grid_id, out.name) if size is None: img, xsize, ysize = util.convert_image(out, out.name)
class MongoDB(Report): """Stores report in MongoDB.""" # Mongo schema version, used for data migration. SCHEMA_VERSION = "1" def connect(self): """Connects to Mongo database, loads options and set connectors. @raise CuckooReportError: if unable to connect. """ host = self.options.get("host", "127.0.0.1") port = self.options.get("port", 27017) db = self.options.get("db", "cuckoo") try: self.conn = MongoClient(host, port) self.db = self.conn[db] self.fs = GridFS(self.db) except TypeError: raise CuckooReportError("Mongo connection port must be integer") except ConnectionFailure: raise CuckooReportError("Cannot connect to MongoDB") def store_file(self, file_obj, filename=""): """Store a file in GridFS. @param file_obj: object to the file to store @param filename: name of the file to store @return: object id of the stored file """ if not filename: filename = file_obj.get_name() existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) if existing: return existing["_id"] else: new = self.fs.new_file(filename=filename, contentType=file_obj.get_content_type(), sha256=file_obj.get_sha256()) for chunk in file_obj.get_chunks(): new.write(chunk) try: new.close() except FileExists: to_find = {"sha256": file_obj.get_sha256()} return self.db.fs.files.find_one(to_find)["_id"] else: return new._id def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one( )["version"] != self.SCHEMA_VERSION: CuckooReportError( "Mongo schema version not expected, check data migration tool" ) else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) if not "network" in report: report["network"] = {} # Store the sample in GridFS. if results["info"]["category"] == "file" and "target" in results: sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"]["pcap_id"] = pcap_id sorted_pcap_path = os.path.join(self.analysis_path, "dump_sorted.pcap") spcap = File(sorted_pcap_path) if spcap.valid(): spcap_id = self.store_file(spcap) report["network"]["sorted_pcap_id"] = spcap_id # Store the process memory dump file in GridFS and reference it back in the report. if "procmemory" in report and self.options.get("store_memdump", False): for idx, procmem in enumerate(report["procmemory"]): procmem_path = os.path.join(self.analysis_path, "memory", "{0}.dmp".format(procmem["pid"])) procmem_file = File(procmem_path) if procmem_file.valid(): procmem_id = self.store_file(procmem_file) report["procmemory"][idx].update( {"procmem_id": procmem_id}) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] if "dropped" in report: for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [ shot for shot in os.listdir(shots_path) if shot.endswith(".jpg") ] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. if "behavior" in report and "processes" in report["behavior"]: new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = { "pid": process["process_id"], "calls": chunk } chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) #self.conn.disconnect() //no longer exists self.conn.close()
def test_grid_in_default_opts(self): self.assertRaises(TypeError, GridIn, "foo") a = GridIn(self.db.fs) self.assertTrue(isinstance(a._id, ObjectId)) self.assertRaises(AttributeError, setattr, a, "_id", 5) self.assertEqual(None, a.filename) self.assertEqual(None, a.name) a.filename = "my_file" self.assertEqual("my_file", a.filename) self.assertEqual("my_file", a.name) self.assertEqual(None, a.content_type) a.content_type = "text/html" self.assertEqual("text/html", a.content_type) self.assertRaises(AttributeError, getattr, a, "length") self.assertRaises(AttributeError, setattr, a, "length", 5) self.assertEqual(255 * 1024, a.chunk_size) self.assertRaises(AttributeError, setattr, a, "chunk_size", 5) self.assertRaises(AttributeError, getattr, a, "upload_date") self.assertRaises(AttributeError, setattr, a, "upload_date", 5) self.assertRaises(AttributeError, getattr, a, "aliases") a.aliases = ["foo"] self.assertEqual(["foo"], a.aliases) self.assertRaises(AttributeError, getattr, a, "metadata") a.metadata = {"foo": 1} self.assertEqual({"foo": 1}, a.metadata) self.assertRaises(AttributeError, getattr, a, "md5") self.assertRaises(AttributeError, setattr, a, "md5", 5) a.close() a.forty_two = 42 self.assertEqual(42, a.forty_two) self.assertTrue(isinstance(a._id, ObjectId)) self.assertRaises(AttributeError, setattr, a, "_id", 5) self.assertEqual("my_file", a.filename) self.assertEqual("my_file", a.name) self.assertEqual("text/html", a.content_type) self.assertEqual(0, a.length) self.assertRaises(AttributeError, setattr, a, "length", 5) self.assertEqual(255 * 1024, a.chunk_size) self.assertRaises(AttributeError, setattr, a, "chunk_size", 5) self.assertTrue(isinstance(a.upload_date, datetime.datetime)) self.assertRaises(AttributeError, setattr, a, "upload_date", 5) self.assertEqual(["foo"], a.aliases) self.assertEqual({"foo": 1}, a.metadata) self.assertEqual("d41d8cd98f00b204e9800998ecf8427e", a.md5) self.assertRaises(AttributeError, setattr, a, "md5", 5) # Make sure custom attributes that were set both before and after # a.close() are reflected in b. PYTHON-411. b = GridFS(self.db).get_last_version(filename=a.filename) self.assertEqual(a.metadata, b.metadata) self.assertEqual(a.aliases, b.aliases) self.assertEqual(a.forty_two, b.forty_two)
def __init__(self): self._db = MongoClient()["ai_cloud_platform"] self._fs = GridFS(MongoClient()["ai_cloud_platform_fs"])
def listAll(self, file_coll): print(GridFS(self.db, collection=file_coll).list())
from bson.objectid import ObjectId from gridfs import GridFS from PIL import Image from werkzeug.utils import secure_filename import io, os app = Flask(__name__) # opens connection to database client = MongoClient( "mongodb://*****:*****@veterans-shard-00-00-0nuxa.mongodb.net:27017," "veterans-shard-00-01-0nuxa.mongodb.net:27017," "veterans-shard-00-02-0nuxa.mongodb.net:27017/test?ssl=true&replicaSet=Veterans-shard-0&auth" "Source=admin") # client = MongoClient() db = client.test # gets actual database fs = GridFS(db) # for getting images mongo = PyMongo(app) # inits mongo server UPLOADED_PHOTOS_DEST = '/images/' ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'} app.config['UPLOADED_PHOTOS_DEST'] = UPLOADED_PHOTOS_DEST photos = UploadSet('photos', IMAGES) configure_uploads(app, (photos, )) with app.app_context(): login_code = setattr(g, 'user', 0) db.inventory.update_one({'account': True}, {'$set': {'id': -1}}) def allowed_file(filename):
def climate_itp_weight_thiessen(conn, db_model, subbsn_id, geodata2dbdir): """Generate and import weight information using Thiessen polygon method. Args: conn: db_model: workflow database object subbsn_id: subbasin id geodata2dbdir: directory to store weight data as txt file """ spatial_gfs = GridFS(db_model, DBTableNames.gridfs_spatial) # read mask file from mongodb mask_name = str(subbsn_id) + '_MASK' if not spatial_gfs.exists(filename=mask_name): raise RuntimeError('%s is not existed in MongoDB!' % mask_name) mask = db_model[DBTableNames.gridfs_spatial].files.find( {'filename': mask_name})[0] ysize = int(mask['metadata'][RasterMetadata.nrows]) xsize = int(mask['metadata'][RasterMetadata.ncols]) nodata_value = mask['metadata'][RasterMetadata.nodata] dx = mask['metadata'][RasterMetadata.cellsize] xll = mask['metadata'][RasterMetadata.xll] yll = mask['metadata'][RasterMetadata.yll] data = spatial_gfs.get(mask['_id']) total_len = xsize * ysize fmt = '%df' % (total_len, ) data = unpack(fmt, data.read()) # print(data[0], len(data), type(data)) # count number of valid cells num = 0 for type_i in range(0, total_len): if abs(data[type_i] - nodata_value) > UTIL_ZERO: num += 1 # read stations information from database metadic = { RasterMetadata.subbasin: subbsn_id, RasterMetadata.cellnum: num } site_lists = db_model[DBTableNames.main_sitelist].find( {FieldNames.subbasin_id: subbsn_id}) site_list = next(site_lists) clim_db_name = site_list[FieldNames.db] p_list = site_list.get(FieldNames.site_p) m_list = site_list.get(FieldNames.site_m) pet_list = site_list.get(FieldNames.site_pet) # print(p_list) # print(m_list) hydro_clim_db = conn[clim_db_name] type_list = [DataType.m, DataType.p, DataType.pet] site_lists = [m_list, p_list, pet_list] if pet_list is None: del type_list[2] del site_lists[2] # if storm_mode: # todo: Do some compatible work for storm and longterm models. # type_list = [DataType.p] # site_lists = [p_list] for type_i, type_name in enumerate(type_list): fname = '%d_WEIGHT_%s' % (subbsn_id, type_name) if spatial_gfs.exists(filename=fname): x = spatial_gfs.get_version(filename=fname) spatial_gfs.delete(x._id) site_list = site_lists[type_i] if site_list is not None: site_list = site_list.split(',') # print(site_list) site_list = [int(item) for item in site_list] metadic[RasterMetadata.site_num] = len(site_list) # print(site_list) q_dic = { StationFields.id: { '$in': site_list }, StationFields.type: type_list[type_i] } cursor = hydro_clim_db[DBTableNames.sites].find(q_dic).sort( StationFields.id, 1) # meteorology station can also be used as precipitation station if cursor.count() == 0 and type_list[type_i] == DataType.p: q_dic = { StationFields.id.upper(): { '$in': site_list }, StationFields.type.upper(): DataType.m } cursor = hydro_clim_db[DBTableNames.sites].find( q_dic).sort(StationFields.id, 1) # get site locations id_list = list() loc_list = list() for site in cursor: if site[StationFields.id] in site_list: id_list.append(site[StationFields.id]) loc_list.append( [site[StationFields.x], site[StationFields.y]]) # print('loclist', locList) # interpolate using the locations myfile = spatial_gfs.new_file(filename=fname, metadata=metadic) txtfile = '%s/weight_%d_%s.txt' % (geodata2dbdir, subbsn_id, type_list[type_i]) with open(txtfile, 'w', encoding='utf-8') as f_test: for y in range(0, ysize): for x in range(0, xsize): index = int(y * xsize + x) if abs(data[index] - nodata_value) > UTIL_ZERO: x_coor = xll + x * dx y_coor = yll + (ysize - y - 1) * dx line, near_index = ImportWeightData.thiessen( x_coor, y_coor, loc_list) myfile.write(line) fmt = '%df' % (len(loc_list)) f_test.write( '%f %f %s\n' % (x, y, unpack(fmt, line).__str__())) myfile.close()
def dados_xml_grava_fsfiles(db, batch_size=5000, data_inicio=datetime(1900, 1, 1), update=True, recinto=None): """Busca por registros no GridFS sem info do XML. Busca por registros no fs.files (GridFS - imagens) que não tenham metadata importada do arquivo XML. Itera estes registros, consultando a xml_todict para ver se retorna informações do XML. Encontrando estas informações, grava no campo metadata.xml do fs.files Args: db: connection to mongo with database setted batch_size: número de registros a consultar/atualizar por chamada data_inicio: filtra por data de escaneamento maior que a informada update: Caso seja setado como False, apenas faz consulta, sem atualizar metadata da collection fs.files recinto: Código do recinto (metadata.recinto) Returns: Número de registros encontrados """ filtro = {'metadata.xml': None, 'metadata.dataescaneamento': {'$gt': data_inicio}, 'metadata.contentType': 'image/jpeg' } if recinto: filtro.update({'metadata.recinto': recinto}) file_cursor = db['fs.files'].find(filtro).limit(batch_size) fs = GridFS(db) total = db['fs.files'].count_documents(filtro) acum = 0 for linha in file_cursor: numero = linha.get('metadata').get('numeroinformado') data = linha.get('uploadDate') filename = linha.get('filename') print(numero, data, filename) # Primeiro procura XML com o mesmo número de container e Upload próximo if numero and data: data_upload_antes = data - timedelta(hours=1) data_upload_depois = data + timedelta(hours=1) xml_document = db['fs.files'].find_one( {'metadata.numeroinformado': numero, 'uploadDate': {'$gt': data_upload_antes, '$lt': data_upload_depois}, 'metadata.contentType': 'text/xml' }) else: xml_document = None # Depois, caso não encontre, tenta por filename if not xml_document: if not filename: continue if filename[:5] == 'XRAY-': posi = filename.find('--Array') xml_filename = filename[5:posi] else: posi = filename.find('_icon') if posi != -1: xml_filename = filename[:posi] else: posi = filename.find('stamp.jpg') if posi != -1: xml_filename = filename[:-11] else: xml_filename = filename[:-4] final_filename = xml_filename + '.xml' xml_document = db['fs.files'].find_one( {'filename': final_filename}) if not xml_document: final_filename = xml_filename + '.XML' xml_document = db['fs.files'].find_one( {'filename': final_filename}) print('xml alternativo:', final_filename) if not xml_document: logger.info('Numero %s filename %s não encontrado!!!' % (numero, filename)) continue file_id = xml_document.get('_id') if not fs.exists(file_id): continue raw = fs.get(file_id).read() encode = chardet.detect(raw) # print(encode) # print(raw) encoding = [encode['encoding'], 'latin1', 'utf8', 'ascii', 'windows-1250', 'windows-1252'] dados_xml = {} for e in encoding: try: xmlo = raw.decode(e) try: ET.fromstring(xml) dados_xml = xml_todict(xmlo) break except Exception: # TODO: see why sometimes a weird character # appears in front of content posi = xmlo.find('<DataForm>') # print('POSI', posi) if posi == -1: xml = xmlo[2:] else: xml = xmlo[posi:] ET.fromstring(xml) dados_xml = xml_todict(xml) break except Exception as err: print('Erro de encoding', e, err) if dados_xml != {}: print(dados_xml) if update: db['fs.files'].update_one( {'_id': linha['_id']}, {'$set': {'metadata.xml': dados_xml}} ) acum += 1 logger.info(' '.join([ 'Resultado dados_xml_grava_fsfiles', 'Pesquisados', str(total), 'Encontrados', str(acum) ])) return acum
def get_pagina_id(conn, oid: str) -> Image: fs = GridFS(conn) _id = ObjectId(oid) grid_out = fs.get(_id) return grid_out.read()
def generate_weight_dependent_parameters(conn, maindb, subbsn_id): """Generate some parameters dependent on weight data and only should be calculated once. Such as PHU0 (annual average total potential heat units) TMEAN0 (annual average temperature) added by Liangjun, 2016-6-17 """ spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial) # read mask file from mongodb mask_name = '%d_MASK' % subbsn_id # is MASK existed in Database? if not spatial_gfs.exists(filename=mask_name): raise RuntimeError('%s is not existed in MongoDB!' % mask_name) # read WEIGHT_M file from mongodb weight_m_name = '%d_WEIGHT_M' % subbsn_id mask = maindb[DBTableNames.gridfs_spatial].files.find( {'filename': mask_name})[0] weight_m = maindb[DBTableNames.gridfs_spatial].files.find( {'filename': weight_m_name})[0] num_cells = int(weight_m['metadata'][RasterMetadata.cellnum]) num_sites = int(weight_m['metadata'][RasterMetadata.site_num]) # read meteorology sites site_lists = maindb[DBTableNames.main_sitelist].find( {FieldNames.subbasin_id: subbsn_id}) site_list = next(site_lists) db_name = site_list[FieldNames.db] m_list = site_list.get(FieldNames.site_m) hydro_clim_db = conn[db_name] site_list = m_list.split(',') site_list = [int(item) for item in site_list] q_dic = { StationFields.id: { '$in': site_list }, StationFields.type: DataType.phu0 } cursor = hydro_clim_db[DBTableNames.annual_stats].find(q_dic).sort( StationFields.id, 1) q_dic2 = { StationFields.id: { '$in': site_list }, StationFields.type: DataType.mean_tmp0 } cursor2 = hydro_clim_db[DBTableNames.annual_stats].find(q_dic2).sort( StationFields.id, 1) id_list = list() phu_list = list() for site in cursor: id_list.append(site[StationFields.id]) phu_list.append(site[DataValueFields.value]) id_list2 = list() tmean_list = list() for site in cursor2: id_list2.append(site[StationFields.id]) tmean_list.append(site[DataValueFields.value]) weight_m_data = spatial_gfs.get(weight_m['_id']) total_len = num_cells * num_sites # print(total_len) fmt = '%df' % (total_len, ) weight_m_data = unpack(fmt, weight_m_data.read()) # calculate PHU0 phu0_data = np_zeros(num_cells) # calculate TMEAN0 tmean0_data = np_zeros(num_cells) for i in range(num_cells): for j in range(num_sites): phu0_data[i] += phu_list[j] * weight_m_data[i * num_sites + j] tmean0_data[i] += tmean_list[j] * weight_m_data[i * num_sites + j] ysize = int(mask['metadata'][RasterMetadata.nrows]) xsize = int(mask['metadata'][RasterMetadata.ncols]) nodata_value = mask['metadata'][RasterMetadata.nodata] mask_data = spatial_gfs.get(mask['_id']) total_len = xsize * ysize fmt = '%df' % (total_len, ) mask_data = unpack(fmt, mask_data.read()) fname = '%d_%s' % (subbsn_id, DataType.phu0) fname2 = '%d_%s' % (subbsn_id, DataType.mean_tmp0) if spatial_gfs.exists(filename=fname): x = spatial_gfs.get_version(filename=fname) spatial_gfs.delete(x._id) if spatial_gfs.exists(filename=fname2): x = spatial_gfs.get_version(filename=fname2) spatial_gfs.delete(x._id) meta_dic = copy.deepcopy(mask['metadata']) meta_dic['TYPE'] = DataType.phu0 meta_dic['ID'] = fname meta_dic['DESCRIPTION'] = DataType.phu0 meta_dic2 = copy.deepcopy(mask['metadata']) meta_dic2['TYPE'] = DataType.mean_tmp0 meta_dic2['ID'] = fname2 meta_dic2['DESCRIPTION'] = DataType.mean_tmp0 myfile = spatial_gfs.new_file(filename=fname, metadata=meta_dic) myfile2 = spatial_gfs.new_file(filename=fname2, metadata=meta_dic2) vaild_count = 0 for i in range(0, ysize): cur_row = list() cur_row2 = list() for j in range(0, xsize): index = i * xsize + j if abs(mask_data[index] - nodata_value) > UTIL_ZERO: cur_row.append(phu0_data[vaild_count]) cur_row2.append(tmean0_data[vaild_count]) vaild_count += 1 else: cur_row.append(nodata_value) cur_row2.append(nodata_value) fmt = '%df' % xsize myfile.write(pack(fmt, *cur_row)) myfile2.write(pack(fmt, *cur_row2)) myfile.close() myfile2.close() print('Valid Cell Number of subbasin %d is: %d' % (subbsn_id, vaild_count)) return True
def init(): client = MongoClient(DB.URI) DB.DATABASE = client['TDSB'] DB.FS = GridFS(DB.DATABASE)
def add_fs(request): return GridFS(request.db)
from multidict import MultiDict from pymongo import MongoClient from gridfs import GridFS from bson import ObjectId from livecode.middleware import parse_content, get_openid from livecode.apis.login import get_empty_user from livecode.apis.user_img import UserImg from livecode.db import init_motor, init_resource, init_gridfs_resource from tests.help import config, get_img host = config['mongodb']['host'] port = config['mongodb']['port'] client = MongoClient(host, port) db = client.live_code fs_live_code_img = GridFS(db, 'live_code_img') fs_user_img = GridFS(db, 'user_img') @pytest.fixture def cli(loop, aiohttp_client): app = web.Application(middlewares=[parse_content, get_openid]) app['config'] = config app.on_startup.append(init_motor) app.on_startup.append(init_gridfs_resource) app.cleanup_ctx.append(init_resource) app.router.add_view('/wx/user/img', UserImg) return loop.run_until_complete(aiohttp_client(app))
def get_gfs(self, name=None): if not name: name = GFS_NAME return GridFS(self.get_db(name))
import pymongo from gridfs import GridFS import os import logging from dotenv import load_dotenv load_dotenv() myclient = pymongo.MongoClient(os.environ.get("DATABASE_URI")) knowledgeGeneratorDB = myclient['KnowledgeGenerator'] Users = knowledgeGeneratorDB['user'] Samples = knowledgeGeneratorDB['samples'] Models = knowledgeGeneratorDB['models'] fs = GridFS(knowledgeGeneratorDB) def store_user(user_obj): if is_user_new(user_obj): logging.info("New user:") logging.info(user_obj) Users.insert_one(user_obj) def store_sample(sample_obj): if is_sample_new(sample_obj): Samples.insert_one(sample_obj) def store_model(pickle_file, filename, version, desc): with open(pickle_file, 'rb') as file:
def __init__(self, database: Database, collection: str): self._fs = GridFS(database, collection)
def test_remove_file(self): fs = GridFS(self.conn['test'], 'test') id = fs.put("test file", filename="test.txt", encoding='utf8') assert_soon(lambda: self._count() == 1) fs.delete(id) assert_soon(lambda: self._count() == 0)
def get_tar(self, record=None, name=None, style=None, raw=False): """ Retrives the tar archive associated with a record in the database. Issues an error if exactly one matching record is not found in the database. Parameters ---------- record : iprPy.Record, optional The record to retrive the associated tar archive for. name : str, optional .The name to use in uniquely identifying the record. style : str, optional .The style to use in uniquely identifying the record. raw : bool, optional If True, return the archive as raw binary content. If False, return as an open tarfile. (Default is False) Returns ------- tarfile or str The tar archive as an open tarfile if raw=False, or as a binary str if raw=True. Raises ------ ValueError If style and/or name content given with record. """ # Create Record object if not given if record is None: record = self.get_record(name=name, style=style) # Issue a TypeError for competing kwargs elif style is not None or name is not None: raise TypeError('kwargs style and name cannot be given with kwarg record') # Verify that record exists else: record = self.get_record(name=record.name, style=record.style) # Define mongofs mongofs = GridFS(self.mongodb, collection=record.style) # Build query query = {} query['recordname'] = record.name # Get tar matches = list(mongofs.find(query)) if len(matches) == 1: tar = matches[0] elif len(matches) == 0: raise ValueError('No tar found for the record') else: raise ValueError('Multiple tars found for the record') # Return content if raw is True: return tar.read() else: return tarfile.open(fileobj=tar)
from sklearn.pipeline import Pipeline # 폐교 밀도 color value -- 김명윤 Jet_colormap = [ '#0000BF', '#0000FF', '#0040ff', '#0080ff', '#00bfff', '#00ffff', '#3fffbf', '#7fffbf', '#bfff3f', '#ffff00', '#ffbf00', '#ff7f00', '#ff4000', '#ff0000', '#bf0000', '#7f0000', '#000000' ] #.env 파일에 적혀있는 키값 가져오기 -- 김명윤 restapi = config('KAKAO_REEST_API') jsapi = config('KAKAO_JAVASCRIPT_API') # #몽고디비 연결 -- 김우희 conn = pymongo.MongoClient().python_test fs = GridFS(conn) #각 시도 별 구역 위도,경도 몽고 DB연결-- 김우희 def load_sido_json(): f = fs.get_last_version(filename="save_sido2.json") model = json.load(BytesIO(f.read())) return model # 학습된 모델 데이터 몽고DB 연결-- 김우희 def load_pcamodel(): f = fs.get_last_version(filename="pca.pkl") data = f.read() model = pickle.load(BytesIO(data)) # model = pickle.load(rf)
def _get_gridfs(self, model_instance): model = model_instance.__class__ return GridFS(connections[model.objects.db].database, model._meta.db_table)
def select_photo(file_name): fs = GridFS(MongoDatabase.mongodb.db) photo_data = fs.get_last_version(filename=file_name) return photo_data if photo_data else None
class GlobalModelDAO: def __init__(self): self._db = MongoClient()["ai_cloud_platform"] self._fs = GridFS(MongoClient()["ai_cloud_platform_fs"]) def insert_one(self, po): inserted_id = self._db["global_models"].insert_one(po).inserted_id return inserted_id def insert_file_one(self, data): inserted_id = self._fs.put(data) return inserted_id @check_id def delete_one_by_id(self, global_model_id): result = self._db["global_models"].delete_one({ "_id": global_model_id }).raw_result return result def delete_many_by_name(self, name): result = self._db["global_models"].delete_many({ "name": name }).raw_result return result def delete_one_by_name_and_version(self, name, version): result = self._db["global_models"].delete_one({ "name": name, "version": version }).raw_result return result def delete_many_by_labels(self, labels): labels = {"labels." + key: labels[key] for key in labels} result = self._db["global_models"].delete_many(labels).raw_result return result def delete_many_by_criteria(self, criteria): result = self._db["global_models"].delete_many(criteria).ras_result return result @check_id def delete_file_one_by_id(self, file_id): self._fs.delete(file_id) return {'ok': 1.0} @check_id def replace_one_by_id(self, global_model_id, po): result = self._db["global_models"].replace_one({ "_id": global_model_id }, po).raw_result return result @check_id def find_one_by_id(self, global_model_id): document = self._db["global_models"].find_one({"_id": global_model_id}) return document def find_many_by_name(self, name): cursor = self._db["global_models"].find({"name": name}) documents = list(cursor) return documents def find_one_by_name_and_version(self, name, version): document = self._db["global_models"].find_one({ "name": name, "version": version }) return document def find_many_by_labels(self, labels): labels = {"labels." + key: labels[key] for key in labels} cursor = self._db["global_models"].find(labels) documents = list(cursor) return documents def find_one_by_local_model_id(self, global_model_id): document = self._db["local_models"].find_one({"_id": global_model_id}) return document def find_many_by_criteria(self, criteria): cursor = self._db["global_models"].find(criteria) documents = list(cursor) return documents def find_many_by_type(self, type_): cursor = self._db["global_models"].find({"type": type}) documents = list(cursor) return documents def find_many_by_subscription_user_id(self, user_id): cursor = self._db["global_models"].find( {"subscriptions.userId": user_id}) documents = list(cursor) return documents @check_id def find_file_one_by_id(self, file_id): out_stream = self._fs.get(file_id) return out_stream def find_file_many_by_id(self, file_ids): if isinstance(file_ids, list): file_ids = ([file_ids]) for index in range(0, len(file_ids)): if isinstance(file_ids[index], str): file_ids[index] = ObjectId(file_ids[index]) out_streams = [self._fs.get(file_ids[index]) for index in file_ids] return out_streams def find_file_many_by_criteria(self, criteria): cursor = self._db["global_models"].find(criteria, "file_id:1") file_ids = list(cursor) out_streams = [self._fs.get(file_id) for file_id in file_ids] return out_streams
from flask import Flask from gridfs import GridFS from pymongo import MongoClient from config import * from celery import Celery app = Flask(__name__) app.secret_key = 'super secret key!!' db = MongoClient(MONGO_URL, connect=False).storage grid_fs = GridFS(db) app.config['CELERY_BROKER_URL'] = CELERY_BROKER_URL app.config['CELERY_RESULT_BACKEND'] = CELERY_RESULT_BACKEND app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER celery = Celery(app.name, broker=app.config['CELERY_BROKER_URL']) celery.conf.update(app.config) from views import * if __name__ == '__main__': app.run()
def test_post_grid_calendar_returns_success_status(app, coverage, get_app_context): filename = 'export_calendars.zip' path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'fixtures/gridcalendar/', filename) files = {'file': (open(path, 'rb'), 'export_calendars.zip')} raw = app.post('/coverages/jdr/grid_calendar', data=files) r = to_json(raw) assert raw.status_code == 200 assert r.get('message') == 'OK' raw = app.get('/coverages') r = to_json(raw) assert len(r['coverages']) == 1 assert 'grid_calendars_id' in r['coverages'][0] gridfs = GridFS(mongo.db) file_id = r['coverages'][0]['grid_calendars_id'] assert gridfs.exists(ObjectId(file_id)) #we update the file (it's the same, but that's not the point) files = {'file': (open(path, 'rb'), 'export_calendars.zip')} raw = app.post('/coverages/jdr/grid_calendar', data=files) assert raw.status_code == 200 raw = app.get('/coverages') r = to_json(raw) assert len(r['coverages']) == 1 assert 'grid_calendars_id' in r['coverages'][0] #it should be another file assert file_id != r['coverages'][0]['grid_calendars_id'] #the previous file has been deleted assert not gridfs.exists(ObjectId(file_id)) #and the new one exist assert gridfs.exists(ObjectId(r['coverages'][0]['grid_calendars_id']))