def test_gridfs(self):
        client = self.client
        fs = GridFS(client.pymongo_test)

        def new_file(session=None):
            grid_file = fs.new_file(_id=1, filename='f', session=session)
            # 1 MB, 5 chunks, to test that each chunk is fetched with same lsid.
            grid_file.write(b'a' * 1048576)
            grid_file.close()

        def find(session=None):
            files = list(fs.find({'_id': 1}, session=session))
            for f in files:
                f.read()

        self._test_ops(
            client,
            (new_file, [], {}),
            (fs.put, [b'data'], {}),
            (lambda session=None: fs.get(1, session=session).read(), [], {}),
            (lambda session=None: fs.get_version('f', session=session).read(),
             [], {}),
            (lambda session=None:
             fs.get_last_version('f', session=session).read(), [], {}),
            (fs.list, [], {}),
            (fs.find_one, [1], {}),
            (lambda session=None: list(fs.find(session=session)), [], {}),
            (fs.exists, [1], {}),
            (find, [], {}),
            (fs.delete, [1], {}))
def metal_bar(request):
    bins = {30000: 0, 60000: 0}

    client = database.getClient()
    fs = GridFS(client.trace_database)

    for traceFile in fs.find():
        if 'sourceTypes' in traceFile.metadata and "vastplace_example" in traceFile.metadata[
                'sourceTypes']:
            points = get_example_points_for_source(traceFile._id)
            for point in points:
                if point[1] <= 30000:
                    bins[30000] += 1
                else:
                    bins[60000] += 1

    buf = plotBarGraph([{
        'X': [int(u) for u in bins.keys()],
        'Y': [bins[k] for k in bins],
        'label': "Bin count",
        'width': 5000,
        'color': 'red'
    }, {
        'X': [int(u) + 5000 for u in bins.keys()],
        'Y': [20 for k in bins],
        'label': "always 20",
        'width': 5000,
        'color': 'green'
    }],
                       xlabel='Sample bar graph',
                       ylabel='The y axis label',
                       legend=True)

    return HttpResponse(buf, content_type='image/svg+xml')
Beispiel #3
0
class FilesWrapper(object):
    def __init__(self, db, *args, **kwargs):
        self.storage = GridFS(db)
        self.comments_wrapper = CommentsWrapper(db)

    def list(self):
        """ Returns are list of files.
        """
        uuids = {f.uuid: {'name': f.filename, 'uuid': f.uuid,
                          'comments': 0, 'version': f.version}
                 for f in self.storage.find()}

        comments = self.comments_wrapper.get_grouped_counts(uuids.keys())

        def _up(uuid, count):
            uuids[uuid]['comments'] = count
            return uuids[uuid]

        return [_up(*c) for c in comments]

    def get(self, uuid, version=0):
        """ Return selected file or None if they does not exists.
        """
        try:
            return self.storage.get_version(uuid=uuid, version=version)
        except NoFile:
            return None

    def add(self, file):
        """ Save file into storage. Give him uuid.
        """
        uuid = unicode(uuid4())
        self.storage.put(file, filename=file.filename,  uuid=uuid, version=0)
        return uuid
Beispiel #4
0
    def tearDown(self):
        gridfs = GridFS(self.mongo.db)
        files = list(gridfs.find())
        for gridfile in files:
            gridfs.delete(gridfile._id)

        super(GridFSCleanupMixin, self).tearDown()
def bargraph(request, targetId_str):

    bins_lower_keys = [0, 14, 21, 28, 35, 60]

    # Not all bins are the same size, adjust the widths
    widths = [
        bins_lower_keys[i + 1] - bins_lower_keys[i]
        for i in range(len(bins_lower_keys) - 1)
    ]
    widths.append(widths[-1])

    x_labels = [
        str(bins_lower_keys[i]) + ' - ' + str(bins_lower_keys[i + 1])
        for i in range(len(bins_lower_keys) - 1)
    ]
    x_labels.append('>' + str(bins_lower_keys[-1]))

    #matplotlib would center around the lower key, add some offsets depending on the withs
    bins_Xs = []
    for bInd, bin_lower in enumerate(bins_lower_keys):
        bins_Xs.append(bin_lower + 0.5 * widths[bInd])

    barData = {k: 0 for k in bins_lower_keys}

    target_ids = targetId_str.split(',')

    client = database.getClient()
    db = client.trace_database
    fs = GridFS(db)

    for traceFile in fs.find(
        {'_id': {
            '$in': [ObjectId(u) for u in target_ids]
        }}):
        point_collection = client.point_database.sensors.find({
            'sourceId':
            ObjectId(traceFile._id),
            'sensorType':
            'PM2.5'
        })
        for point in point_collection:
            bin_key = max(
                [k for k in bins_lower_keys if k <= point['sensorValue']])
            barData[bin_key] += 1

    buf = plotBarGraph([{
        'X': bins_Xs,
        'Y': [barData[k] for k in sorted(barData.keys())],
        'label': "PM2.5",
        'width': widths,
        'color': 'green'
    }],
                       xlabel='PM2.5',
                       ylabel='Occurence',
                       x_ticks=bins_Xs,
                       x_tick_labels=x_labels,
                       legend=True)

    return HttpResponse(buf, content_type='image/svg+xml')
Beispiel #6
0
 def CleanSpatialGridFs(self, scenario_id):
     # type: (int) -> None
     """Delete Spatial GridFS files in Main database."""
     spatial_gfs = GridFS(self.maindb, DBTableNames.gridfs_spatial)
     # If there are any GridFS in Sptial collection are generated during scenario analysis,
     #   the format of such GridFS file is: <SubbasinID>_<CoreFileName>_<ScenarioID>
     # e.g., SLPPOS_UNITS_12345678
     regex_str = '\\d+_\\S+_%d' % scenario_id
     for i in spatial_gfs.find({'filename': {'$regex': regex_str}}):
         spatial_gfs.delete(i._id)
Beispiel #7
0
class MongoStorageBackend(IStorageBackend):
    def __init__(self, db_host, db_port=None, db_name=None):
        if db_port is None:
            db_port = 27017
        if db_name is None:
            db_name = "lts"

        self.db = MongoClient(db_host, db_port)[db_name]

        self.fs = GridFS(
            MongoClient(db_host, db_port)[db_name]
        )

    def get_object_id_list(self, cursor, limit):
        def peek(cursor, limit):
            if len([x._id for x in self.fs.find().sort('_id', ASCENDING).skip(cursor + limit)]) > 0:
                return str(cursor + limit)
            return None
        cursor = int(cursor)
        results = [x._id for x in self.fs.find().sort('_id', ASCENDING).skip(cursor).limit(limit)]
        next_cursor = peek(cursor, limit)
        return next_cursor, results

    def check_object_exists(self, id):
        if self.fs.find_one({"_id": id}):
            return True
        return False

    def get_object(self, id):
        gr_entry = self.fs.find_one({"_id": id})
        if gr_entry is None:
            raise ObjectNotFoundError(str(id))
        return gr_entry

    def set_object(self, id, content):
        if self.check_object_exists(id):
            raise ObjectAlreadyExistsError(str(id))
        content_target = self.fs.new_file(_id=id)
        content.save(content_target)
        content_target.close()

    def del_object(self, id):
        return self.fs.delete(id)
Beispiel #8
0
    def fetch_GridFS(self, dbname, output_html_folder):
        db = self.client[dbname]
        # GridFS collection for large file
        fs = GridFS(db)

        for fn in fs.list():
            file_path = output_html_folder + '/' + fn
            for fs_doc in fs.find({"filename": fn}):
                fs_doc = fs_doc.read()
                with open(file_path, 'w') as f:
                    f.write(fs_doc.decode('utf-8'))
Beispiel #9
0
    def _search(self):
        for doc in self.mongo.find():
            yield doc

        fs = GridFS(self.mongo_conn['test'], 'test')
        for doc in self.mongo_conn['__mongo_connector']['test.test'].find():
            if doc.get('gridfs_id'):
                for f in fs.find({'_id': doc['gridfs_id']}):
                    doc['filename'] = f.filename
                    doc['content'] = f.read()
                    yield doc
class GridFSPickleDict(BaseStorage):
    """A dictionary-like interface for a GridFS collection"""
    def __init__(self,
                 db_name,
                 collection_name=None,
                 connection=None,
                 **kwargs):
        """
        :param db_name: database name (be careful with production databases)
        :param connection: ``pymongo.Connection`` instance. If it's ``None``
                           (default) new connection with default options will
                           be created
        """
        super().__init__(**kwargs)
        if connection is not None:
            self.connection = connection
        else:
            self.connection = MongoClient()

        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return self.deserialize(result.read())

    def __setitem__(self, key, item):
        try:
            self.__delitem__(key)
        except KeyError:
            pass
        self.fs.put(self.serialize(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is None:
            raise KeyError
        self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].estimated_document_count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Beispiel #11
0
    def _search(self):
        for doc in self.mongo.find():
            yield doc

        fs = GridFS(self.mongo_conn['test'], 'test')
        for doc in self.mongo_conn['__mongo_connector']['test.test'].find():
            if doc.get('gridfs_id'):
                for f in fs.find({'_id': doc['gridfs_id']}):
                    doc['filename'] = f.filename
                    doc['content'] = f.read()
                    yield doc
Beispiel #12
0
    def delete_tar(self, record=None, name=None, style=None):
        """
        Deletes a tar file from the database.  Issues an error if exactly one
        matching record is not found in the database.
        
        Parameters
        ----------
        record : iprPy.Record, optional
            The record associated with the tar archive to delete.  If not
            given, then name and/or style necessary to uniquely identify
            the record are needed.
        name : str, optional
            .The name to use in uniquely identifying the record.
        style : str, optional
            .The style to use in uniquely identifying the record.
        
        Raises
        ------
        ValueError
            If style and/or name content given with record.
        """

        # Create Record object if not given
        if record is None:
            record = self.get_record(name=name, style=style)

        # Issue a ValueError for competing kwargs
        elif style is not None or name is not None:
            raise ValueError(
                'kwargs style and name cannot be given with kwarg record')

        # Verify that record exists
        else:
            record = self.get_record(name=record.name, style=record.style)

        # Define mongofs
        mongofs = GridFS(self.mongodb, collection=record.style)

        # Build query
        query = {}
        query['recordname'] = record.name

        # Get tar
        matches = list(mongofs.find(query))
        if len(matches) == 1:
            tar = matches[0]
        elif len(matches) == 0:
            raise ValueError('No tar found for the record')
        else:
            raise ValueError('Multiple tars found for the record')

        # Delete tar
        mongofs.delete(tar._id)
Beispiel #13
0
class GridFSPickleDict(BaseStorage):
    """A dictionary-like interface for a GridFS database

    Args:
        db_name: Database name
        collection_name: Ignored; GridFS internally uses collections 'fs.files' and 'fs.chunks'
        connection: :py:class:`pymongo.MongoClient` object to reuse instead of creating a new one
        kwargs: Additional keyword arguments for :py:class:`pymongo.MongoClient`
    """
    def __init__(self,
                 db_name,
                 collection_name=None,
                 connection=None,
                 **kwargs):
        super().__init__(**kwargs)
        connection_kwargs = get_valid_kwargs(MongoClient, kwargs)
        self.connection = connection or MongoClient(**connection_kwargs)
        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return self.deserialize(result.read())

    def __setitem__(self, key, item):
        try:
            self.__delitem__(key)
        except KeyError:
            pass
        self.fs.put(self.serialize(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is None:
            raise KeyError
        self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].estimated_document_count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Beispiel #14
0
class LostUploader():
    def __init__(self, uri, database):
        self.client = MongoClient(uri)
        self.database = self.client[database]
        self.radialvelocity = self.database['radialvelocity']
        self.one = GridFS(self.database, collection='one')
        self.two = GridFS(self.database, collection='two')

        self.radialvelocity.remove()

        for i in self.one.find():  # or fs.list()
            self.one.delete(i._id)
        for i in self.two.find():  # or fs.list()
            self.two.delete(i._id)

    def upload1D(self, filepath, filename):
        bucket = GridFS(self.database, collection='one')
        file = open(filepath, 'rb')
        return bucket.put(file, filename=filename)

    def upload2D(self, filepath, filename):
        bucket = GridFS(self.database, collection='two')
        file = open(filepath, 'rb')
        return bucket.put(file, filename=filename)

    def uploadRV(self, entry):
        rv = self.database['radialvelocity']

        rv.insert_one(entry)

    def uploadSet(self, filepath1D, filepath2D, entry):
        id1 = self.upload1D(filepath1D, entry['filename'])
        id2 = self.upload2D(filepath2D, entry['filename'])
        id3 = self.uploadRV(entry)
        return (id1, id2, id3)

    def readOne(self, id):
        bucket = GridFS(self.database, collection='one')
        print(bucket.get(id).read())
    def _search(self, **kwargs):
        for doc in self.mongo.find(**kwargs):
            yield doc

        fs = GridFS(self.mongo_conn['test'], 'test')

        collection_name = 'test.test'
        if self.use_single_meta_collection:
            collection_name = '__oplog'
        for doc in self.mongo_conn['__mongo_connector'][collection_name].find():
            if doc.get('gridfs_id'):
                for f in fs.find({'_id': doc['gridfs_id']}):
                    doc['filename'] = f.filename
                    doc['content'] = f.read()
                    yield doc
    def _search(self, **kwargs):
        for doc in self.mongo.find(**kwargs):
            yield doc

        fs = GridFS(self.mongo_conn['test'], 'test')

        collection_name = 'test.test'
        if self.use_single_meta_collection:
            collection_name = '__oplog'
        for doc in self.mongo_conn['__mongo_connector'][collection_name].find():
            if doc.get('gridfs_id'):
                for f in fs.find({'_id': doc['gridfs_id']}):
                    doc['filename'] = f.filename
                    doc['content'] = f.read()
                    yield doc
    def _search(self, **kwargs):
        for doc in self.mongo.find(**kwargs):
            yield doc

        fs = GridFS(self.mongo_conn["test"], "test")

        collection_name = "test.test"
        if self.use_single_meta_collection:
            collection_name = "__oplog"
        col = self.mongo_conn["__mongo_connector"][collection_name]
        for doc in col.find():
            if doc.get("gridfs_id"):
                for f in fs.find({"_id": doc["gridfs_id"]}):
                    doc["filename"] = f.filename
                    doc["content"] = f.read()
                    yield doc
def metal_rest(request):
    bins = {'under 30000': 0, 'over 30000': 0}

    client = database.getClient()
    fs = GridFS(client.trace_database)

    for traceFile in fs.find():
        if 'sourceTypes' in traceFile.metadata and "vastplace_example" in traceFile.metadata[
                'sourceTypes']:
            points = get_example_points_for_source(traceFile._id)
            for point in points:
                if point[1] <= 30000:
                    bins['under 30000'] += 1
                else:
                    bins['over 30000'] += 1
    return Response(bins, status=status.HTTP_200_OK)
Beispiel #19
0
    def _search(self, **kwargs):
        for doc in self.mongo.find(**kwargs):
            yield doc

        fs = GridFS(self.mongo_conn["test"], "test")

        collection_name = "test.test"
        if self.use_single_meta_collection:
            collection_name = "__oplog"
        col = self.mongo_conn["__mongo_connector"][collection_name]
        for doc in col.find():
            if doc.get("gridfs_id"):
                for f in fs.find({"_id": doc["gridfs_id"]}):
                    doc["filename"] = f.filename
                    doc["content"] = f.read()
                    yield doc
Beispiel #20
0
    def import_htmlfiles(self, html_folder, dbname, colname):
        """
        A function to import html files to a mongo database collection
        :param dbname: database name
        :param colname: collection name
        :param html_folder: the folder stores the html files
        """
        json_docs, folder_files = ConvertHtmlJson().convert_html2json(
            html_folder=html_folder)
        db = self.client[dbname]
        collection = db[colname]
        collection_child = db[colname + "_child"]

        # GridFS collection for large file
        fs = GridFS(db)
        # store single documents
        for jdoc in json_docs:
            # get file name to check whether the document exist
            fn = jdoc["file_name"]
            # if it doesn't exist
            if collection.find({'file_name': fn}).count() == 0:
                try:
                    collection.insert(jdoc)
                    print("Inserted document: " + fn)
                except DocumentTooLarge:
                    if fs.find({"filename": fn}).count() == 0:
                        html_content = jdoc["html_content"]
                        html_content = str(html_content).encode("UTF-8")
                        fs.put(html_content, filename=fn)
                        print("Inserted document: " + fn)
                    else:
                        print(fn + " already exist!")
            else:
                print(fn + " already exist!")
        # store folder documents
        for fn, sheets in folder_files.items():
            for sheet in sheets:
                sn = sheet["sheet_name"]
                if collection_child.find({
                        'file_name': fn,
                        'sheet_name': sn
                }).count() == 0:
                    collection_child.insert(sheet)
                    print("Inserted document: " + fn + '/' + sn)
                else:
                    print(fn + '/' + sn + " already exist!")
class GridFSPickleDict(MutableMapping):
    """ MongoDict - a dictionary-like interface for ``mongo`` database
    """
    def __init__(self, db_name, connection=None):
        """
        :param db_name: database name (be careful with production databases)
        :param connection: ``pymongo.Connection`` instance. If it's ``None``
                           (default) new connection with default options will
                           be created
        """
        if connection is not None:
            self.connection = connection
        else:
            self.connection = MongoClient()

        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return pickle.loads(bytes(result.read()))

    def __setitem__(self, key, item):
        self.__delitem__(key)
        self.fs.put(pickle.dumps(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Beispiel #22
0
class GridFsBackend(BaseBackend):
    '''
    A Mongo GridFS backend

    Expect the following settings:

    - `mongo_url`: The Mongo access URL
    - `mongo_db`: The database to store the file in.
    '''
    def __init__(self, name, config):
        super(GridFsBackend, self).__init__(name, config)

        self.client = MongoClient(config.mongo_url)
        self.db = self.client[config.mongo_db]
        self.fs = GridFS(self.db, self.name)

    def exists(self, filename):
        return self.fs.exists(filename=filename)

    @contextmanager
    def open(self, filename, mode='r', encoding='utf8'):
        if 'r' in mode:
            f = self.fs.get_last_version(filename)
            yield f if 'b' in mode else codecs.getreader(encoding)(f)
        else:  # mode == 'w'
            f = io.BytesIO() if 'b' in mode else io.StringIO()
            yield f
            params = {'filename': filename}
            if 'b' not in mode:
                params['encoding'] = encoding
            self.fs.put(f.getvalue(), **params)

    def read(self, filename):
        f = self.fs.get_last_version(filename)
        return f.read()

    def write(self, filename, content):
        return self.fs.put(self.as_binary(content), filename=filename)

    def delete(self, filename):
        for version in self.fs.find({'filename': filename}):
            self.fs.delete(version._id)

    def serve(self, filename):
        file = self.fs.get_last_version(filename)
        return send_file(file, mimetype=file.content_type)
class GridFSPickleDict(MutableMapping):
    """ MongoDict - a dictionary-like interface for ``mongo`` database
    """
    def __init__(self, db_name, connection=None):
        """
        :param db_name: database name (be careful with production databases)
        :param connection: ``pymongo.Connection`` instance. If it's ``None``
                           (default) new connection with default options will
                           be created
        """
        if connection is not None:
            self.connection = connection
        else:
            self.connection = MongoClient()

        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return pickle.loads(bytes(result.read()))

    def __setitem__(self, key, item):
        self.__delitem__(key)
        self.fs.put(pickle.dumps(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Beispiel #24
0
class MotorCoreTestGridFS(MotorTest):
    def setUp(self):
        super(MotorCoreTestGridFS, self).setUp()
        self.sync_fs = GridFS(env.sync_cx.test)
        self.sync_fs.delete(file_id=1)
        self.sync_fs.put(b'', _id=1)

    def tearDown(self):
        self.sync_fs.delete(file_id=1)
        super(MotorCoreTestGridFS, self).tearDown()

    def test_gridfs_attrs(self):
        pymongo_gridfs_only = set([
            # Obsolete PyMongo methods.
            'open',
            'remove'])

        motor_gridfs_only = set(['collection']).union(motor_only)

        self.assertEqual(
            attrs(GridFS(env.sync_cx.test)) - pymongo_gridfs_only,
            attrs(MotorGridFS(self.cx.test)) - motor_gridfs_only)

    def test_gridin_attrs(self):
        motor_gridin_only = set(['set']).union(motor_only)

        self.assertEqual(
            attrs(GridIn(env.sync_cx.test.fs)),
            attrs(MotorGridIn(self.cx.test.fs)) - motor_gridin_only)

    @gen_test
    def test_gridout_attrs(self):
        motor_gridout_only = set([
            'open',
            'stream_to_handler'
        ]).union(motor_only)

        motor_gridout = yield MotorGridOut(self.cx.test.fs, file_id=1).open()
        self.assertEqual(
            attrs(self.sync_fs.get(1)),
            attrs(motor_gridout) - motor_gridout_only)

    def test_gridout_cursor_attrs(self):
        self.assertEqual(
            attrs(self.sync_fs.find()) - pymongo_cursor_only,
            attrs(MotorGridFS(self.cx.test).find()) - motor_cursor_only)
Beispiel #25
0
def download_to_local(db):
    fs = GridFS(db)
    cursor = fs.find()

    # with open(file.filename, 'wb') as f:
    #         while True:
    #             data = file.read()
    #             if not data:
    #                 break
    #             f.write(data)
    for file in cursor:
        if file.filename == "Zootopia_0.mp4":
            with open(file.filename, 'wb') as f:
                while True:
                    data = file.read()
                    if not data:
                        break
                    f.write(data)
class GridFSCache(BaseCache):
    """A dictionary-like interface for MongoDB GridFS

    Args:
        db_name: database name (be careful with production databases)
        connection: MongoDB connection instance to use instead of creating a new one
    """
    def __init__(self, db_name, connection: MongoClient = None):
        self.connection = connection or MongoClient()
        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    # TODO
    async def contains(self, key: str) -> bool:
        raise NotImplementedError

    async def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    async def delete(self, key: str):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    async def keys(self) -> Iterable[str]:
        return [d._id for d in self.fs.find()]

    async def read(self, key: str) -> ResponseOrKey:
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return self.unpickle(bytes(result.read()))

    async def size(self) -> int:
        return self.db['fs.files'].count()

    # TODO
    async def values(self) -> Iterable[ResponseOrKey]:
        raise NotImplementedError

    async def write(self, key: str, item: ResponseOrKey):
        await self.delete(key)
        self.fs.put(pickle.dumps(item, protocol=-1), **{'_id': key})
Beispiel #27
0
class GridFsBackendTest(BackendTestCase):
    @pytest.fixture(autouse=True)
    def setup(self):
        self.client = MongoClient()
        self.db = self.client[TEST_DB]
        self.gfs = GridFS(self.db, 'test')

        self.config = Config({
            'mongo_url': 'mongodb://localhost:27017',
            'mongo_db': TEST_DB,
        })
        self.backend = GridFsBackend('test', self.config)
        yield
        self.client.drop_database(TEST_DB)

    def put_file(self, filename, content):
        self.gfs.put(content, filename=filename, encoding='utf-8')

    def get_file(self, filename):
        file = self.gfs.get_last_version(filename)
        assert file is not None
        return file.read()

    def file_exists(self, filename):
        return self.gfs.exists(filename=filename)

    def test_default_bucket(self):
        backend = GridFsBackend('test_bucket', self.config)
        assert backend.fs._GridFS__collection.name == 'test_bucket'

    def test_config(self):
        assert self.backend.client.address == ('localhost', 27017)
        assert self.backend.db.name == TEST_DB

    def test_delete_with_versions(self, faker):
        filename = 'test.txt'
        self.put_file(filename, faker.sentence())
        self.put_file(filename, faker.sentence())
        assert self.gfs.find({'filename': filename}).count() == 2

        self.backend.delete(filename)
        assert not self.file_exists(filename)
Beispiel #28
0
def list_files():
    """Lista arquivos no banco MongoDB.

    Lista 10 arquivos mais recentes no banco MongoDB,
    por uploadDate mais recente.
    Se houver upload em andamento, informa.
    """
    db = app.config['mongodb']
    fs = GridFS(db)
    lista_arquivos = []
    for grid_data in fs.find().sort('uploadDate', -1).limit(10):
        lista_arquivos.append(grid_data.filename)
    taskid = request.args.get('taskid')
    task_info = None
    if taskid:
        task = raspa_dir.AsyncResult(taskid)
        task_info = task.info
    return render_template('importa_bson.html',
                           lista_arquivos=lista_arquivos,
                           task_info=task_info)
Beispiel #29
0
class GridFSPickleDict(MutableMapping):
    """A dictionary-like interface for MongoDB GridFS"""

    def __init__(self, db_name, connection: MongoClient = None):
        """
        Args:
            db_name: database name (be careful with production databases)
            connection: MongoDB connection instance to use instead of creating a new one
        """
        self.connection = connection or MongoClient()
        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return pickle.loads(bytes(result.read()))

    def __setitem__(self, key, item):
        self.__delitem__(key)
        self.fs.put(pickle.dumps(item, protocol=PICKLE_PROTOCOL), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Beispiel #30
0
 def CleanOutputGridFs(self, scenario_id=-1, calibration_id=-1):
     # type: (int, int) -> None
     """Delete Output GridFS files in OUTPUT collection."""
     if not self.OutputItems:  # No outputs
         return
     output_gfs = GridFS(self.maindb, DBTableNames.gridfs_output)
     for corename, types in viewitems(self.OutputItems):
         if types is None:
             continue
         # The format of filename of OUPUT by SEIMS MPI version is:
         #   <SubbasinID>_CoreFileName_ScenarioID_CalibrationID
         #   If no ScenarioID or CalibrationID, i.e., with a value of -1, just left blank.
         #  e.g.,
         #    - 1_SED_OL_SUM_1_ means ScenarioID is 1 and Calibration ID is -1
         #    - 1_SED_OL_SUM__ means ScenarioID is -1 and Calibration ID is -1
         #    - 1_SED_OL_SUM_0_2 means ScenarioID is 0 and Calibration ID is 2
         regex_str = '(|\\d+_)%s(|_\\S+)' % corename
         regex_str += '_' if scenario_id < 0 else '_%d' % scenario_id
         regex_str += '_' if calibration_id < 0 else '_%d' % calibration_id
         for i in output_gfs.find({'filename': {'$regex': regex_str}}):
             output_gfs.delete(i._id)
def grafana_query_median_impl(request):
    request_dict = json.loads(request.body)
    responseData = [{"target": request_dict['targets'], "datapoints": []}]
    values = []

    target_ids = request_dict['targets'][0]['target'][:-len('.median')].lstrip(
        '(').rstrip(')').split('|')

    from_dt = datetime.strptime(request_dict["range"]["from"],
                                '%Y-%m-%dT%H:%M:%S.%fZ')
    to_dt = datetime.strptime(request_dict["range"]["to"],
                              '%Y-%m-%dT%H:%M:%S.%fZ')

    from_ts = float(from_dt.strftime('%s'))
    to_ts = float(to_dt.strftime('%s'))

    client = database.getClient()
    db = client.trace_database
    fs = GridFS(db)
    for traceFile in fs.find(
        {'_id': {
            '$in': [ObjectId(u) for u in target_ids]
        }}):
        point_collection = client.point_database.sensors.find({
            'sourceId':
            ObjectId(traceFile._id),
            'sensorType':
            'PM2.5',
            'vTimestamp': {
                '$gt': from_ts,
                '$lt': to_ts
            }
        })
        for point in point_collection:
            values.append(int(point["sensorValue"]))

    responseData[0]["datapoints"].append(
        [np.median(values), float(from_ts + to_ts) / 2])
    return responseData
def grafana_sources_search(request, format=None):
    request_dict = json.loads(request.body)

    source_type = None

    if request_dict['target'] == "mobile_ids":
        source_type = "ambassadair_mobile"
    elif request_dict['target'] == "static_ids":
        source_type = "ambassadair_static"

    client = database.getClient()
    db = client.trace_database
    fs = GridFS(db)
    responseData = []
    for traceFile in fs.find():
        if 'sourceTypes' in traceFile.metadata and source_type in traceFile.metadata[
                'sourceTypes']:
            responseData.append({
                "text": traceFile.filename,
                "value": str(traceFile._id)
            })

    return Response(responseData, status=status.HTTP_200_OK)
Beispiel #33
0
def results():
    data = dict(request.form)
    #print(data)
    query = data['user_query']
    connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
    collection = connection[DBS_NAME][COLLECTION_NAME]
    fs = GridFS(connection[DBS_NAME])
    regex = re.compile("\\b" + query + "\\b", re.IGNORECASE)
    projects = collection.find({'notes': regex}).limit(1000)
    state_names = []
    states = []
    json_projects = []
    images = {}
    for project in projects:
        state_names.append(project.get("state"))
        json_projects.append(project)
    #print(state_names)
    for state in state_names:
        state = state.replace(" ", "")
        state = state + ".jpg"
        states.append(state)
    #print(states)
    for s in states:
        for grid_output in fs.find({'filename': s}):
            img = b64encode(grid_output.read()).decode('utf-8')
            images[s] = img
    #for s in states:
    #    img = fs.get_version(s, 'rb').read()
    #    images[s] = img
    #print(images)
    #json_projects = json.dumps(json_projects, default=json_util.default)
    connection.close()
    #print(type(json_projects))
    #return json_projects
    return render_template('results.html',
                           result=json_projects,
                           result2=images)
def air_map(request):
    tiling_level = 17
    response = None
    client = database.getClient()
    db = client.trace_database
    fs = GridFS(db)
    responseData = {'trajectories': [], 'points': [], 'rectangles': []}
    tiledMeasures = {}
    for traceFile in fs.find():
        if 'sourceTypes' in traceFile.metadata and (
                "ambassadair_mobile" in traceFile.metadata['sourceTypes']
                or "ambassadair_static" in traceFile.metadata['sourceTypes']):
            points = air_map_get_points_bysource(traceFile._id)
            responseData['trajectories'].append({
                'points': [[lon, lat] for lat, lon, _ in points],
                'id':
                traceFile._id
            })
            for lat, lon, val in points:
                tile_number = osm_latlon_to_tile_number(lon, lat, tiling_level)
                if tile_number not in tiledMeasures:
                    tiledMeasures[tile_number] = []
                tiledMeasures[tile_number].append(val)

    for tileX, tileY in tiledMeasures:
        x1, y1 = osm_tile_number_to_latlon(tileX, tileY, tiling_level)
        x2, y2 = osm_tile_number_to_latlon(tileX + 1, tileY + 1, tiling_level)
        mean = np.mean(tiledMeasures[tileX, tileY])
        mean_str = "%.3f" % mean
        if mean > 10:
            color = 'rgba(255, 0, 0, 0.3)'
        else:
            color = 'rgba(0, 255, 0, 0.3)'
        responseData['rectangles'].append((x1, x2, y1, y2, mean_str, color))

    client.close()
    return render(request, 'mapper/map.html', responseData)
Beispiel #35
0
    def test_gridfs(self):
        client = self.client
        fs = GridFS(client.pymongo_test)

        def new_file(session=None):
            grid_file = fs.new_file(_id=1, filename='f', session=session)
            # 1 MB, 5 chunks, to test that each chunk is fetched with same lsid.
            grid_file.write(b'a' * 1048576)
            grid_file.close()

        def find(session=None):
            files = list(fs.find({'_id': 1}, session=session))
            for f in files:
                f.read()

        self._test_ops(
            client, (new_file, [], {}), (fs.put, [b'data'], {}),
            (lambda session=None: fs.get(1, session=session).read(), [], {}),
            (lambda session=None: fs.get_version('f', session=session).read(),
             [], {}), (lambda session=None: fs.get_last_version(
                 'f', session=session).read(), [], {}), (fs.list, [], {}),
            (fs.find_one, [1], {}),
            (lambda session=None: list(fs.find(session=session)), [], {}),
            (fs.exists, [1], {}), (find, [], {}), (fs.delete, [1], {}))
def metal_cdf(request):
    all_points = []

    client = database.getClient()
    fs = GridFS(client.trace_database)

    for traceFile in fs.find():
        if 'sourceTypes' in traceFile.metadata and "vastplace_example" in traceFile.metadata[
                'sourceTypes']:
            points = get_example_points_for_source(traceFile._id)
            all_points += [p[1] for p in points]

    buf = plotCDF([
        {
            'data': all_points,
            'label': "metal points",
            'color': 'green'
        },
    ],
                  xlabel=' Metal',
                  ylabel='CDF',
                  legend=False)

    return HttpResponse(buf, content_type='image/svg+xml')
Beispiel #37
0
def getContent(id):
    incident_id = int(id)
    print(type(incident_id))
    connection = MongoClient(MONGODB_HOST, MONGODB_PORT)
    collection = connection[DBS_NAME][COLLECTION_NAME]
    fs = GridFS(connection[DBS_NAME])

    project = collection.find_one({'incident_id': incident_id},
                                  projection=FIELDS)
    print(type(project))
    img = None

    state = project['state']
    print(state)
    state = state.replace(" ", "")
    state = state + ".jpg"
    for grid_output in fs.find({'filename': state}):
        img = base64.b64encode(grid_output.read()).decode('utf-8')
    print(project)
    print(img)

    # now we have the entire document in project & image in img
    connection.close()
    return json.dumps([project, img])
Beispiel #38
0
class GridFSOperations(Operations):

    def __init__(self, host, db_name='test', collection_name='fs'):
        self.client = MongoClient(host)
        self.db = Database(self.client, db_name)
        self.fs = GridFS(self.db, collection_name)

    def _new_file(self, name):
        return self.fs.new_file(
            filename=name,
            aliases=[],
            length=0,
            upload_date=datetime.now())

    @logmethod
    def init(self):
        pass

    @logmethod
    def access(self, inode, mode, ctx):
        return True

    @logmethod
    def getattr(self, inode):
        if inode == 1:
            return Operations.getattr(self, inode)
        else:
            return grid2attrs(self.fs.get(int2oid(inode)))

    @logmethod
    def lookup(self, parent_inode, name):

        if parent_inode != 1:
            raise FUSEError(errno.ENOENT)

        try:
            gridout = self.fs.get_last_version(filename=name.decode())
        except NoFile:
            raise FUSEError(errno.ENOENT)

        return grid2attrs(gridout)

    @logmethod
    def create(self, inode_parent, name, mode, flags, ctx):
        gridin = self._new_file(name.decode())
        fh = oid2int(gridin._id)
        grid_cache[fh] = gridin
        return (fh, grid2attrs(gridin))

    @logmethod
    def flush(self, fh):
        grid = grid_cache[fh]
        grid.close()

    @logmethod
    def setattr(self, inode, attr):
        gridout = self.fs.get(int2oid(inode))
        return grid2attrs(gridout)

    @logmethod
    def release(self, fh):
        del grid_cache[fh]

    @logmethod
    def forget(self, inode_list):

        for inode in inode_list:
            if inode in oid_cache.ints:
                del oid_cache.ints[inode]

    @logmethod
    def destroy(self):
        self.client.close()

    @logmethod
    def open(self, inode, flags):
        gridout = self.fs.get(int2oid(inode))
        grid_cache[inode] = gridout
        return inode

    @logmethod
    def read(self, fh, off, size):
        grid = grid_cache[fh]

        if isinstance(grid, GridIn):
            grid.close()
            grid = self.fs.get(int2oid(fh))
            grid_cache[fh] = grid

        grid.seek(off)
        return grid.read(size)

    @logmethod
    def write(self, fh, off, buf):
        grid = grid_cache[fh]

        if isinstance(grid, GridOut):
            offbuf = grid.read(off)
            grid = self._new_file(name=grid.name)
            grid_cache[fh] = grid
            grid.write(offbuf)
            del offbuf

        if grid.closed:
            grid = self._new_file(name=grid.name)
            grid_cache[fh] = grid

        grid.write(buf)
        return len(buf)

    @logmethod
    def unlink(self, parent_inode, name):

        if parent_inode != 1:
            Operations.unlink(self, parent_inode, name)
        else:
            for gridout in self.fs.find({'filename': name.decode()}):
                self.fs.delete(gridout._id)

    @logmethod
    def fsync(self, fh, datasync):
        Operations.fsync(self, fh, datasync)

    @logmethod
    def fsyncdir(self, fh, datasync):
        Operations.fsyncdir(self, fh, datasync)

    @logmethod
    def getxattr(self, inode, name):
        Operations.getxattr(self, inode, name)

    @logmethod
    def link(self, inode, new_parent_inode, new_name):
        Operations.link(self, inode, new_parent_inode, new_name)

    @logmethod
    def listxattr(self, inode):
        Operations.listxattr(self, inode)

    @logmethod
    def mkdir(self, parent_inode, name, mode, ctx):
        Operations.mkdir(self, parent_inode, name, mode, ctx)

    @logmethod
    def mknod(self, parent_inode, name, mode, rdev, ctx):
        Operations.mknod(self, parent_inode, name, mode, rdev, ctx)

    @logmethod
    def opendir(self, inode):
        Operations.opendir(self, inode)

    @logmethod
    def readdir(self, fh, off):
        Operations.readdir(self, fh, off)

    @logmethod
    def readlink(self, inode):
        Operations.readlink(self, inode)

    @logmethod
    def releasedir(self, fh):
        Operations.releasedir(self, fh)

    @logmethod
    def removexattr(self, inode, name):
        Operations.removexattr(self, inode, name)

    @logmethod
    def rename(self, inode_parent_old, name_old, inode_parent_new, name_new):
        Operations.rename(self,
            inode_parent_old, name_old, inode_parent_new, name_new)

    @logmethod
    def rmdir(self, inode_parent, name):
        Operations.rmdir(self, inode_parent, name)

    @logmethod
    def setxattr(self, inode, name, value):
        Operations.setxattr(self, inode, name, value)

    @logmethod
    def statfs(self):
        Operations.statfs(self)

    @logmethod
    def symlink(self, inode_parent, name, target, ctx):
        Operations.symlink(self, inode_parent, name, target, ctx)
Beispiel #39
0
class GridFsBackend(BaseBackend):
    '''
    A Mongo GridFS backend

    Expect the following settings:

    - `mongo_url`: The Mongo access URL
    - `mongo_db`: The database to store the file in.
    '''
    def __init__(self, name, config):
        super(GridFsBackend, self).__init__(name, config)

        self.client = MongoClient(config.mongo_url)
        self.db = self.client[config.mongo_db]
        self.fs = GridFS(self.db, self.name)

    def exists(self, filename):
        return self.fs.exists(filename=filename)

    @contextmanager
    def open(self, filename, mode='r', encoding='utf8'):
        if 'r' in mode:
            f = self.fs.get_last_version(filename)
            yield f if 'b' in mode else codecs.getreader(encoding)(f)
        else:  # mode == 'w'
            f = io.BytesIO() if 'b' in mode else io.StringIO()
            yield f
            params = {'filename': filename}
            if 'b' not in mode:
                params['encoding'] = encoding
            self.fs.put(f.getvalue(), **params)

    def read(self, filename):
        f = self.fs.get_last_version(filename)
        return f.read()

    def write(self, filename, content):
        kwargs = {
            'filename': filename
        }

        if hasattr(content, 'content_type') and content.content_type is not None:
            kwargs['content_type'] = content.content_type

        return self.fs.put(self.as_binary(content), **kwargs)

    def delete(self, filename):
        regex = '^{0}'.format(re.escape(filename))
        for version in self.fs.find({'filename': {'$regex': regex}}):
            self.fs.delete(version._id)

    def copy(self, filename, target):
        src = self.fs.get_last_version(filename)
        self.fs.put(src, filename=target, content_type=src.content_type)

    def list_files(self):
        for f in self.fs.list():
            yield f

    def serve(self, filename):
        file = self.fs.get_last_version(filename)
        return send_file(file, mimetype=file.content_type)

    def get_metadata(self, filename):
        f = self.fs.get_last_version(filename)
        return {
            'checksum': 'md5:{0}'.format(f.md5),
            'size': f.length,
            'mime': f.content_type,
            'modified': f.upload_date,
        }
Beispiel #40
0
def _get_grid_id(md5, db_alias=DEFAULT_CONNECTION_NAME, collection_name='fs'):
    fs = GridFS(get_db(db_alias), collection_name)
    try:
        return fs.find({"md5": md5})[0]
    except Exception, e:
        return None
Beispiel #41
0
class MongoFileStorage(MongoStorage, FileStorage):

    FILENAME = 'filename'

    def __init__(self, *args, **kwargs):

        super(MongoFileStorage, self).__init__(*args, **kwargs)

        self.gridfs = None

    def _connect(self, **kwargs):

        result = super(MongoFileStorage, self)._connect(**kwargs)

        if result:

            self.gridfs = GridFS(
                database=self._database, collection=self.get_table()
            )

        return result

    def put(self, name, data, meta=None):

        try:
            fs = self.new_file(name=name, meta=meta)
            fs.write(data=data)
        finally:
            fs.close()

    def put_meta(self, name, meta):
        try:
            oldf, _meta = self.get(name, with_meta=True)
            _meta.update(meta)

            fs = self.new_file(name=name, meta=_meta)

            while True:
                data = oldf.read(512)

                if not data:
                    break

                fs.write(data=data)

        finally:
            fs.close()

    def get(self, name, version=-1, with_meta=False):

        result = None

        try:
            gridout = self.gridfs.get_version(filename=name, version=version)
        except NoFile:
            pass
        else:
            if with_meta:
                result = MongoFileStream(gridout), gridout.metadata

            else:
                result = MongoFileStream(gridout)

        return result

    def get_meta(self, name):
        result = self.get(name, with_meta=True)

        if result is not None:
            result = result[1]

        return result

    def exists(self, name):

        result = self.gridfs.exists(filename=name)

        return result

    def find(
        self,
        names=None,
        meta=None,
        sort=None,
        limit=-1,
        skip=0,
        with_meta=False
    ):

        request = {}

        if names is not None:
            if isinstance(names, basestring):
                request[MongoFileStorage.FILENAME] = names
            else:
                request[MongoFileStorage.FILENAME] = {'$in': names}

        if meta is not None:
            for metafield in meta:
                field = 'metadata.{0}'.format(metafield)
                request[field] = meta[metafield]

        cursor = self.gridfs.find(request)

        if sort is not None:
            cursor.sort(sort)
        if limit > 0:
            cursor.limit(limit)
        if skip > 0:
            cursor.skip(skip)

        if with_meta:
            result = (
                (MongoFileStream(gridout), gridout.metadata)
                for gridout in cursor
            )

        else:
            result = (MongoFileStream(gridout) for gridout in cursor)

        return result

    def list(self):

        return self.gridfs.list()

    def new_file(self, name=None, meta=None, data=None):

        kwargs = {}

        if name is None:
            name = str(uuid())

        kwargs['filename'] = name

        if meta is not None:
            kwargs['metadata'] = meta

        gridout = self.gridfs.new_file(**kwargs)

        result = MongoFileStream(gridout)

        if data is not None:
            result.write(data)

        return result

    def delete(self, names=None):

        if names is None:
            names = self.gridfs.list()

        names = ensure_iterable(names)

        for name in names:
            while True:
                fs = self.get(name)

                if fs is None:
                    break

                self.gridfs.delete(file_id=fs.get_inner_object()._id)
Beispiel #42
0
class Database():
    def __init__(self):
        # Create the connection
        if config['valid']:
            mongo_uri = config['database']['mongo_uri']
        else:
            mongo_uri = 'mongodb://localhost'

        connection = pymongo.MongoClient(mongo_uri)

        # Version Check
        server_version = connection.server_info()['version']
        if int(server_version[0]) < 3:
            raise UserWarning('Incompatible MongoDB Version detected. Requires 3 or higher. Found {0}'.format(server_version))

        # Connect to Databases.
        voldb = connection['voldb']
        voldbfs = connection['voldbfs']

        # Get Collections
        self.vol_sessions = voldb.sessions
        self.vol_comments = voldb.comments
        self.vol_plugins = voldb.plugins
        self.vol_datastore = voldb.datastore
        self.vol_files = GridFS(voldbfs)

        # Indexes
        self.vol_comments.create_index([('freetext', 'text')])

        self.vol_plugins.create_index([('$**', 'text')])

    ##
    # Sessions
    ##
    def get_allsessions(self):
        sessions = self.vol_sessions.find()
        return [x for x in sessions]

    def get_session(self, session_id):
        session_id = ObjectId(session_id)
        session = self.vol_sessions.find_one({'_id': session_id})
        return session

    def create_session(self, session_data):
        session_id = self.vol_sessions.insert_one(session_data).inserted_id
        return session_id

    def update_session(self, session_id, new_values):
        session_id = ObjectId(session_id)
        self.vol_sessions.update_one({'_id': session_id}, {"$set": new_values })
        return True

    ##
    # Comments
    ##
    def get_commentbyid(self, comment_id):
        comment_id = ObjectId(comment_id)
        comment = self.vol_comments.find({'_id': comment_id})
        return comment

    def get_commentbysession(self, session_id):
        session_id = ObjectId(session_id)
        comments = self.vol_comments.find({'session_id': session_id}).sort("created", -1)
        return [row for row in comments]

    def create_comment(self, comment_data):
        comment_id = self.vol_comments.insert_one(comment_data).inserted_id
        return comment_id

    def search_comments(self, search_text, session_id=None):
        results = []
        rows = self.vol_comments.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                session_id = ObjectId(session_id)
                if row['session_id'] == session_id:
                    results.append(row)
            else:
                results.append(row)
        return results

    ##
    # Plugins
    ##

    def get_pluginbysession(self, session_id):
        session_id = ObjectId(session_id)
        result_rows = []
        plugin_output = self.vol_plugins.find({'session_id': session_id}).sort("created", -1)
        for row in plugin_output:
            result_rows.append(row)

        # result_rows.sort(key=lambda d: (d["plugin_name"]))

        return result_rows

    def get_pluginbyid(self, plugin_id):
        plugin_id = ObjectId(plugin_id)
        plugin_output = self.vol_plugins.find_one({'_id': plugin_id})
        if 'largedoc' in plugin_output:
            large_document_id = plugin_output['plugin_output']
            large_document = self.get_filebyid(large_document_id)
            plugin_output['plugin_output'] = json.loads(large_document.read())
        return plugin_output

    def get_plugin_byname(self, plugin_name, session_id):
        session_id = ObjectId(session_id)
        plugin_output = self.vol_plugins.find_one({'session_id': session_id, 'plugin_name': plugin_name})
        if plugin_output and 'largedoc' in plugin_output:
            large_document_id = plugin_output['plugin_output']
            large_document = self.get_filebyid(large_document_id)
            plugin_output['plugin_output'] = json.loads(large_document.read())
        return plugin_output

    def create_plugin(self, plugin_data):
        # Force session ID
        plugin_data['session_id'] = ObjectId(plugin_data['session_id'])
        plugin_id = self.vol_plugins.insert_one(plugin_data).inserted_id
        return plugin_id

    def search_plugins(self, search_text, session_id=None, plugin_name=None):
        results = []
        rows = self.vol_plugins.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                session_id = ObjectId(session_id)
                if row['session_id'] == session_id:
                    results.append(row)
            # This is the session filter from the main page.
            elif plugin_name:
                if row['plugin_name'] == plugin_name:
                    if search_text in str(row['plugin_output']):
                        results.append(row['session_id'])

            else:
                results.append(row)
        return results

    def update_plugin(self, plugin_id, new_values):
        plugin_id = ObjectId(plugin_id)
        if len(str(new_values)) > 12000000:
            print "Storing Large Document in GridFS"
            large_document = json.dumps(new_values['plugin_output'])
            large_document_id = self.create_file(large_document, 'sess_id', 'sha256', 'filename', pid=None, file_meta=None)
            new_values['plugin_output'] = large_document_id
            new_values['largedoc'] = 'True'

        self.vol_plugins.update_one({'_id': plugin_id}, {"$set": new_values})
        return True


    ##
    # File System
    ##
    def get_filebyid(self, file_id):
        file_id = ObjectId(file_id)
        file_object = self.vol_files.get(file_id)
        return file_object

    def list_files(self, session_id):
        session_id = ObjectId(session_id)
        results = self.vol_files.find({'session_id': session_id})
        return [row for row in results]

    def search_files(self, search_query):
        results = self.vol_files.find(search_query)
        return [row for row in results]

    def get_strings(self, file_id):
        file_id = ObjectId(file_id)
        results = self.vol_files.find_one({'filename': '{0}_strings.txt'.format(str(file_id))})
        return results

    def create_file(self, file_data, session_id, sha256, filename, pid=None, file_meta=None):
        if len(session_id) == 24:
            session_id = ObjectId(session_id)
        file_id = self.vol_files.put(file_data, filename=filename, sess_id=session_id, sha256=sha256, pid=pid, file_meta=file_meta)
        return file_id

    def drop_file(self, file_id):
        file_id = ObjectId(file_id)
        self.vol_files.delete(file_id)
        return True

    ##
    # DataStore
    ##

    def get_alldatastore(self):
        results = self.vol_datastore.find()
        return [row for row in results]

    def search_datastore(self, search_query):
        results = self.vol_datastore.find(search_query)
        return [row for row in results]

    def create_datastore(self, store_data):
        data_id = self.vol_datastore.insert_one(store_data).inserted_id
        return data_id

    def update_datastore(self, search_query, new_values):
        self.vol_datastore.update_one(search_query, {"$set": new_values})
        return True



    ##
    # Drop Session
    ##
    def drop_session(self, session_id):
        session_id = ObjectId(session_id)

        # Drop Plugins
        self.vol_plugins.delete_many({'session_id': session_id})
        # Drop Files
        results = self.vol_files.find({'session_id': session_id})
        for row in results:
            self.vol_files.delete(row['file_id'])
        # Drop DataStore
        self.vol_datastore.delete_many({'session_id': session_id})
        # Drop Notes
        self.vol_comments.delete_many({'session_id': session_id})
        # Drop session
        self.vol_sessions.delete_many({'_id': session_id})
Beispiel #43
0
class Database():
    def __init__(self):
        # Create the connection

        connection = pymongo.MongoClient('localhost')

        # Version Check
        server_version = connection.server_info()['version']
        if int(server_version[0]) < 3:
            raise UserWarning('Incompatible MongoDB Version detected. Requires 3 or higher. Found {0}'.format(server_version))

        # Connect to Databases.
        voldb = connection['voldb']
        voldbfs = connection['voldbfs']

        # Get Collections
        self.vol_sessions = voldb.sessions
        self.vol_comments = voldb.comments
        self.vol_plugins = voldb.plugins
        self.vol_datastore = voldb.datastore
        self.vol_files = GridFS(voldbfs)

        # Indexes
        self.vol_comments.create_index([('freetext', 'text')])

        self.vol_plugins.create_index([('$**', 'text')])

    ##
    # Sessions
    ##
    def get_allsessions(self):
        sessions = self.vol_sessions.find()
        return [x for x in sessions]

    def get_session(self, sess_id):
        session = self.vol_sessions.find_one({'_id': sess_id})
        return session

    def create_session(self, sess_data):
        sess_id = self.vol_sessions.insert_one(sess_data).inserted_id
        return sess_id

    def update_session(self, sess_id, new_values):
        self.vol_sessions.update_one({'_id':sess_id},{"$set": new_values })
        return True

    ##
    # Comments
    ##
    def get_commentbyid(self, comment_id):
        comment = self.vol_comments.find({'_id': comment_id})
        return comment

    def get_commentbysession(self,session_id):
        comments = self.vol_comments.find({'session_id': session_id}).sort("created", -1)
        return [row for row in comments]

    def create_comment(self, comment_data):
        comment_id = self.vol_comments.insert_one(comment_data).inserted_id
        return comment_id

    def search_comments(self, search_text, session_id=None):
        results = []
        rows = self.vol_comments.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                if row['session_id'] == session_id:
                    results.append(row)
            else:
                results.append(row)
        return results

    ##
    # Plugins
    ##

    def get_pluginbysession(self, session_id):
        result_rows = []
        plugin_output = self.vol_plugins.find({'session_id': session_id}).sort("created", -1)
        for row in plugin_output:
            result_rows.append(row)
        return result_rows

    def get_pluginbyid(self, plugin_id):
        plugin_output = self.vol_plugins.find_one({'_id': plugin_id})
        return plugin_output

    def create_plugin(self, plugin_data):
        plugin_id = self.vol_plugins.insert_one(plugin_data).inserted_id
        return plugin_id

    def search_plugins(self, search_text, session_id=None):
        results = []
        rows = self.vol_plugins.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                if row['session_id'] == session_id:
                    results.append(row)
            else:
                results.append(row)
        return results

    def update_plugin(self, plugin_id, new_values):
        self.vol_plugins.update_one({'_id':plugin_id},{"$set": new_values })
        return True


    ##
    # File System
    ##
    def get_filebyid(self, file_id):
        file_object = self.vol_files.get(file_id)
        return file_object

    def list_files(self, sess_id):
        results = self.vol_files.find({'session_id': sess_id})
        return results

    def create_file(self, file_data, sess_id, sha256, filename, pid=None, file_meta=None):
        file_id = self.vol_files.put(file_data, filename=filename, sess_id=sess_id, sha256=sha256, pid=pid)
        return file_id


    ##
    # DataStore
    ##

    def get_alldatastore(self):
        results = self.vol_datastore.find()
        return [row for row in results]

    def search_datastore(self, search_query):
        results = self.vol_datastore.find(search_query)
        return [row for row in results]

    def create_datastore(self, store_data):
        data_id = self.vol_datastore.insert_one(store_data).inserted_id
        return data_id

    def update_datastore(self, search_query, new_values):
        self.vol_datastore.update_one(search_query, {"$set": new_values})
        return True


    ##
    # Drop Session
    ##
    def drop_session(self, session_id):

        # Drop Plugins
        self.vol_plugins.delete_many({'session_id': session_id})
        # Drop Files
        results = self.vol_files.find({'session_id': session_id})
        for row in results:
            self.vol_files.delete(row['file_id'])
        # Drop DataStore
        self.vol_datastore.delete_many({'session_id': session_id})
        # Drop Notes
        self.vol_comments.delete_many({'session_id': session_id})
        # Drop session
        self.vol_sessions.delete_many({'_id': session_id})
class GridFsBackendTest(BackendTestCase):
    hasher = 'md5'

    @pytest.fixture
    def pngimage(self, pngfile):
        with open(pngfile, 'rb') as f:
            yield f

    @pytest.fixture
    def jpgimage(self, jpgfile):
        with open(jpgfile, 'rb') as f:
            yield f

    @pytest.fixture(autouse=True)
    def setup(self):
        self.client = MongoClient()
        self.db = self.client[TEST_DB]
        self.gfs = GridFS(self.db, 'test')

        self.config = Config({
            'mongo_url': 'mongodb://localhost:27017',
            'mongo_db': TEST_DB,
        })
        self.backend = GridFsBackend('test', self.config)
        yield
        self.client.drop_database(TEST_DB)

    def put_file(self, filename, content):
        self.gfs.put(content, filename=filename, encoding='utf-8')

    def get_file(self, filename):
        file = self.gfs.get_last_version(filename)
        assert file is not None
        return file.read()

    def file_exists(self, filename):
        return self.gfs.exists(filename=filename)

    def test_default_bucket(self):
        backend = GridFsBackend('test_bucket', self.config)
        assert backend.fs._GridFS__collection.name == 'test_bucket'

    def test_config(self):
        assert self.backend.client.address == ('localhost', 27017)
        assert self.backend.db.name == TEST_DB

    def test_delete_with_versions(self, faker):
        filename = 'test.txt'
        self.put_file(filename, faker.sentence())
        self.put_file(filename, faker.sentence())
        assert self.gfs.find({'filename': filename}).count() == 2

        self.backend.delete(filename)
        assert not self.file_exists(filename)

    def test_write_pngimage(self, pngimage, utils):
        filename = 'test.png'
        content = six.binary_type(pngimage.read())
        content_type = mimetypes.guess_type(filename)[0]
        f = utils.filestorage(filename, content, content_type)
        self.backend.write(filename, f)

        with self.backend.open(filename, 'rb') as f:
            assert f.content_type == content_type

        self.assert_bin_equal(filename, content)

    def test_write_jpgimage(self, jpgimage, utils):
        filename = 'test.jpg'
        content = six.binary_type(jpgimage.read())
        content_type = mimetypes.guess_type(filename)[0]
        f = utils.filestorage(filename, content, content_type)
        self.backend.write(filename, f)

        with self.backend.open(filename, 'rb') as f:
            assert f.content_type == content_type

        self.assert_bin_equal(filename, content)