Example #1
0
def del_files_of_object(objid, types=['all']):
    allowed_types = ['images', 'thumbs', 'videos', 'audios']
    if (types == ['all']):
        types = allowed_types
    for filetype in types:
        if filetype in allowed_types:
            dbname = mediatypes[filetype]['db']
            ext = mediatypes[filetype]['ext']
            gridfsdb = database.Database(
                MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), dbname)
            fs = GridFS(gridfsdb)
            if (filetype in ['audios', 'videos']):
                languages = mongodb.db.languages.find({}, {
                    '_id': 0,
                    'code': 1,
                    'locale': 1
                })
                for language in languages:
                    isocode = ','.join([language['code'], language['locale']])
                    fileid = fs.find_one({'filename': isocode + objid + ext})
                    if (fileid is not None):
                        fs.delete(fileid._id)
            else:
                fileid = fs.find_one({'filename': objid + ext})
                if (fileid is not None):
                    fs.delete(fileid._id)
        else:
            return False
class GridFSPickleDict(BaseStorage):
    """A dictionary-like interface for a GridFS collection"""
    def __init__(self,
                 db_name,
                 collection_name=None,
                 connection=None,
                 **kwargs):
        """
        :param db_name: database name (be careful with production databases)
        :param connection: ``pymongo.Connection`` instance. If it's ``None``
                           (default) new connection with default options will
                           be created
        """
        super().__init__(**kwargs)
        if connection is not None:
            self.connection = connection
        else:
            self.connection = MongoClient()

        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return self.deserialize(result.read())

    def __setitem__(self, key, item):
        try:
            self.__delitem__(key)
        except KeyError:
            pass
        self.fs.put(self.serialize(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is None:
            raise KeyError
        self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].estimated_document_count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Example #3
0
def replicate_to_new_node(node_id, new_node_id):
    mongo = MongoClient('localhost', 27017)
    db = mongo['rpfs_slave_db_' + str(node_id)]
    fs = GridFS(db)
    topology = list(db.topology.find())
    local_v_nodes = filter(lambda node: node.get('node_id') == node_id,
                           topology)[0].get('v_nodes')
    responses = []

    sorted_v_node_ids, v_node_map = build_topology_maps(topology)

    # replicate files to new node if it becomes the current node's replica node
    for v_node in local_v_nodes:
        next_v_node_id = get_next_distinct_v_node_id(
            v_node.get('hash_ring_id'), sorted_v_node_ids, v_node_map)

        if not next_v_node_id:
            break

        next_node = v_node_map.get(next_v_node_id)

        if next_node.get('node_id') == new_node_id:
            previous_v_node_id_index = get_previous_v_node_id_index(
                v_node.get('hash_ring_id'), sorted_v_node_ids)
            previous_v_node_id = sorted_v_node_ids[previous_v_node_id_index]
            fdocs = get_file_documents_between(db, previous_v_node_id,
                                               v_node.get('hash_ring_id'))

            for fdoc in fdocs:
                f = fs.find_one({'filename': fdoc.get('filename')})
                responses.append(send_file(f, next_node))

    # replicate files to the new node whose responsibility has been taken away by the new node
    for v_node in local_v_nodes:
        previous_v_node_id_index = get_previous_v_node_id_index(
            v_node.get('hash_ring_id'), sorted_v_node_ids)
        previous_v_node_id = sorted_v_node_ids[previous_v_node_id_index]
        previous_node = v_node_map.get(previous_v_node_id)

        if previous_node.get('node_id') == new_node_id:
            previous_previous_v_node_id = get_previous_distinct_v_node_id(
                previous_v_node_id, sorted_v_node_ids, v_node_map)

            if not previous_previous_v_node_id:
                break

            fdocs = get_file_documents_between(db, previous_previous_v_node_id,
                                               previous_v_node_id)

            for fdoc in fdocs:
                f = fs.find_one({'filename': fdoc.get('filename')})
                responses.append(send_file(f, previous_node))

    assert_successful_responses(responses)
Example #4
0
class GridFSPickleDict(BaseStorage):
    """A dictionary-like interface for a GridFS database

    Args:
        db_name: Database name
        collection_name: Ignored; GridFS internally uses collections 'fs.files' and 'fs.chunks'
        connection: :py:class:`pymongo.MongoClient` object to reuse instead of creating a new one
        kwargs: Additional keyword arguments for :py:class:`pymongo.MongoClient`
    """
    def __init__(self,
                 db_name,
                 collection_name=None,
                 connection=None,
                 **kwargs):
        super().__init__(**kwargs)
        connection_kwargs = get_valid_kwargs(MongoClient, kwargs)
        self.connection = connection or MongoClient(**connection_kwargs)
        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return self.deserialize(result.read())

    def __setitem__(self, key, item):
        try:
            self.__delitem__(key)
        except KeyError:
            pass
        self.fs.put(self.serialize(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is None:
            raise KeyError
        self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].estimated_document_count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Example #5
0
def file(_id=None):
    """Tela para exibição de um 'arquivo' do GridFS.

    Exibe o arquivo e os metadados associados a ele.
    """
    db = app.config['mongodb']
    fs = GridFS(db)
    tags_object = Tags(db)
    form_tags = TagsForm()
    form_tags.tags.choices = tags_object.tags_text
    if request.args.get('filename'):
        filename = mongo_sanitizar(request.args.get('filename'))
        logger.warn('Filename %s ' % filename)
        grid_data = fs.find_one({'filename': filename})
    else:
        if not _id:
            _id = request.args.get('_id')
        grid_data = fs.get(ObjectId(_id))
    # print(grid_data)
    if grid_data:
        summary_ = dict_to_html(summary(grid_data=grid_data))
        summary_carga = dict_to_html(carga.summary(grid_data=grid_data))
        tags = tags_object.list(_id)
        ocorrencias = Ocorrencias(db).list(_id)
    else:
        summary_ = summary_carga = 'Arquivo não encontrado.'
        ocorrencias = []
    return render_template('view_file.html',
                           myfile=grid_data,
                           summary=summary_,
                           summary_carga=summary_carga,
                           form_tags=form_tags,
                           tags=tags,
                           ocorrencias=ocorrencias)
Example #6
0
def set_obj_media(objid, isocode, mediatype, file):
    if (file.filename[-4:] not in ['.mp3', '.mp4']):
        return False
    if (mediatype not in ['audios', 'videos']):
        return False

    gridfsdb = database.Database(
        MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT),
        mediatypes[mediatype]['db'])
    fs = GridFS(gridfsdb)

    mongodb.db.objects.update_one(
        {
            'id': objid,
            'translations.isocode': isocode
        }, {'$set': {
            'translations.$.' + mediatype: True
        }})
    oldfile = fs.find_one(
        {'filename': isocode + '-' + objid + mediatypes[mediatype]['ext']})
    if (oldfile is not None):
        fs.delete(oldfile._id)
    oid = fs.put(file,
                 content_type=file.content_type,
                 filename=isocode + '-' + objid + mediatypes[mediatype]['ext'])

    return True
Example #7
0
 def get(self, id):
     grid_fs = GridFS(get_db(_cfg.database__database_repository))
     grid_fs_file = grid_fs.find_one({'_id': ObjectId(id)})
     file = open(_cfg.repository__path + grid_fs_file.name, 'w')
     file.write(grid_fs_file.read())
     file.close()
     return "ok"
Example #8
0
def insert_pagina(mongodb, png_image, numero_dta: str, filename: str,
                  npagina: int) -> (ObjectId, bool):
    """
    Insere um png no fs.files. Se existir arquivo com mesmo md5 e filename,
    considera inserção repetida e retorna _id do existente.

    :param mongodb: Conexão e banco MongoDB
    :param png_image: conteúdo da imagem
    :param numero_dta: metadata.numero_dta
    :param filename: filename
    :param npagina: metadata.npagina
    :return: (ObjectId, True|False)
        ObjectId gerado ou _id de arquivo se já existe
        True se arquivo existe
        False se não existe e foi gerado novo documento
    """
    fs = GridFS(mongodb)
    content = png_image
    m = md5()
    m.update(content)
    grid_out = fs.find_one({'md5': m.hexdigest()})
    if grid_out:
        if grid_out.filename == filename:
            logger.warning(' Arquivo %s Pagina %s MD5 %s  '
                           'tentativa de inserir pela segunda vez!!' %
                           (filename, npagina, m.hexdigest()))
            # File exists, abort!
            return grid_out._id, True
    # Insert File
    params = {'numero_dta': numero_dta, 'pagina': npagina}
    return fs.put(content, filename=filename, metadata=params), False
Example #9
0
class ScrapeFacts(CoreJob):
    author = "mra"

    def execute(self, test=False, *args, **kwargs):
        self.target = self.config.driverlicense.collection.data
        self.gfs = GridFS(self.target.connection[self.target.database])
        self.download(test)

    def download(self, test):
        rv = requests.get(url)
        body = rv.content.decode("utf-8")
        links = re.findall("href=[\"\'](.+?)[\"\']", body)
        xls = [
            href for href in links
            if href.endswith(".xls") or href.endswith(".xlsx")
        ]
        self.logger.info("found [%d] xlsx files", len(xls))
        download = 0
        for link in xls:
            doc = self.gfs.find_one({"filename": link})
            if doc is None:
                self.logger.info("download [%s]", link)
                rv = requests.get(link)
                if not test:
                    self.gfs.put(rv.content, filename=link)
                download += 1
        self.logger.info("successfully retrieved [%d] of [%d] files", download,
                         len(xls))
Example #10
0
def create_group(avatar, name):
    db = get_db()
    fs = GridFS(db, collection="images")
    filename, file_extension = os.path.splitext(avatar.filename)
    if not allowed_file(file_extension):
        return 'Image type is wrong!'
    data = avatar.read()
    sha1 = hashlib.sha1(data).hexdigest()
    filename = sha1 + file_extension
    image_obj = fs.find_one({"filename": filename})
    if not image_obj:
        image_obj = fs.put(data,
                           filename=filename,
                           content_type=avatar.content_type)
    else:
        image_obj = image_obj._id
    # print(image_obj)
    gid = db.groups.insert({
        'name': name,
        'avatar': image_obj,
    })
    db.group_user.insert({
        'uid': ObjectId(session['uid']),
        'gid': gid,
    })
    return 'ok'
Example #11
0
def __put(f, name):
    fs = GridFS(get_db())
    # 计算文件属性
    extension = os.path.splitext(name)[1].lower()
    mimetype = filelib.mime(f)
    sha1 = filelib.sha1(f)
    md5 = filelib.md5(f)
    arg = {'mimetype': mimetype, 'sha1': sha1, 'md5': md5}
    exist_fs = fs.find_one(arg)
    if exist_fs:
        return FileMeta(file_id=exist_fs._id,
                        length=exist_fs.length,
                        extension=exist_fs.extension,
                        **arg)

    newfile = fs.new_file(content_type=mimetype,
                          name=name,
                          extension=extension,
                          **arg)
    newfile.write(f)
    newfile.close()
    return FileMeta(file_id=newfile._id,
                    length=newfile.length,
                    extension=newfile.extension,
                    **arg)
Example #12
0
    def test_it_guesses_type_from_filename(self):
        fileobj = BytesIO(b"these are the bytes")

        self.mongo.save_file("my-file.txt", fileobj)

        gridfs = GridFS(self.mongo.db)
        gridfile = gridfs.find_one({"filename": "my-file.txt"})
        assert gridfile.content_type == "text/plain"
class GridFSPickleDict(MutableMapping):
    """ MongoDict - a dictionary-like interface for ``mongo`` database
    """
    def __init__(self, db_name, connection=None):
        """
        :param db_name: database name (be careful with production databases)
        :param connection: ``pymongo.Connection`` instance. If it's ``None``
                           (default) new connection with default options will
                           be created
        """
        if connection is not None:
            self.connection = connection
        else:
            self.connection = MongoClient()

        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return pickle.loads(bytes(result.read()))

    def __setitem__(self, key, item):
        self.__delitem__(key)
        self.fs.put(pickle.dumps(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Example #14
0
    def test_it_saves_files_with_props(self):
        fileobj = BytesIO(b"these are the bytes")

        self.mongo.save_file("my-file", fileobj, foo="bar")

        gridfs = GridFS(self.mongo.db)
        gridfile = gridfs.find_one({"filename": "my-file"})
        assert gridfile.foo == "bar"
Example #15
0
    def test_it_guesses_type_from_filename(self):
        fileobj = BytesIO(b"these are the bytes")

        self.mongo.save_file("my-file.txt", fileobj)

        gridfs = GridFS(self.mongo.db)
        gridfile = gridfs.find_one({"filename": "my-file.txt"})
        assert gridfile.content_type == "text/plain"
Example #16
0
    def test_it_saves_files_with_props(self):
        fileobj = BytesIO(b"these are the bytes")

        self.mongo.save_file("my-file", fileobj, foo="bar")

        gridfs = GridFS(self.mongo.db)
        gridfile = gridfs.find_one({"filename": "my-file"})
        assert gridfile.foo == "bar"
class GridFSPickleDict(MutableMapping):
    """ MongoDict - a dictionary-like interface for ``mongo`` database
    """
    def __init__(self, db_name, connection=None):
        """
        :param db_name: database name (be careful with production databases)
        :param connection: ``pymongo.Connection`` instance. If it's ``None``
                           (default) new connection with default options will
                           be created
        """
        if connection is not None:
            self.connection = connection
        else:
            self.connection = MongoClient()

        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return pickle.loads(bytes(result.read()))

    def __setitem__(self, key, item):
        self.__delitem__(key)
        self.fs.put(pickle.dumps(item), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Example #18
0
    def test_fs(self):
        fs = GridFS(get_db(alias="recall-db-alias"))
        # with open('xxx.jpg', 'rb') as fd:
        #     id = fs.put(fd)
        #     print(id)
        b = fs.get(ObjectId("5f39fd75e9974144d72231eb"))
        b2 = fs.find_one({"md5": "c3c1ae262ddd527f0465dfb77a546b73"})

        print(1)
 def find_image(self, image_name, query):
     grid_fs = GridFS(self.db)
     grid_fs_file = grid_fs.find_one(query)
     response = make_response(grid_fs_file.read())
     response.headers['Content-Type'] = 'application/octet-stream'
     response.headers[
         "Content-Disposition"] = "attachment; filename={}".format(
             image_name)
     return response
Example #20
0
class GridFSCache(BaseCache):
    """A dictionary-like interface for MongoDB GridFS

    Args:
        db_name: database name (be careful with production databases)
        connection: MongoDB connection instance to use instead of creating a new one
    """
    def __init__(self, db_name, connection: MongoClient = None):
        self.connection = connection or MongoClient()
        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    # TODO
    async def contains(self, key: str) -> bool:
        raise NotImplementedError

    async def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    async def delete(self, key: str):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    async def keys(self) -> Iterable[str]:
        return [d._id for d in self.fs.find()]

    async def read(self, key: str) -> ResponseOrKey:
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return self.unpickle(bytes(result.read()))

    async def size(self) -> int:
        return self.db['fs.files'].count()

    # TODO
    async def values(self) -> Iterable[ResponseOrKey]:
        raise NotImplementedError

    async def write(self, key: str, item: ResponseOrKey):
        await self.delete(key)
        self.fs.put(pickle.dumps(item, protocol=-1), **{'_id': key})
Example #21
0
class MongoStorageBackend(IStorageBackend):
    def __init__(self, db_host, db_port=None, db_name=None):
        if db_port is None:
            db_port = 27017
        if db_name is None:
            db_name = "lts"

        self.db = MongoClient(db_host, db_port)[db_name]

        self.fs = GridFS(
            MongoClient(db_host, db_port)[db_name]
        )

    def get_object_id_list(self, cursor, limit):
        def peek(cursor, limit):
            if len([x._id for x in self.fs.find().sort('_id', ASCENDING).skip(cursor + limit)]) > 0:
                return str(cursor + limit)
            return None
        cursor = int(cursor)
        results = [x._id for x in self.fs.find().sort('_id', ASCENDING).skip(cursor).limit(limit)]
        next_cursor = peek(cursor, limit)
        return next_cursor, results

    def check_object_exists(self, id):
        if self.fs.find_one({"_id": id}):
            return True
        return False

    def get_object(self, id):
        gr_entry = self.fs.find_one({"_id": id})
        if gr_entry is None:
            raise ObjectNotFoundError(str(id))
        return gr_entry

    def set_object(self, id, content):
        if self.check_object_exists(id):
            raise ObjectAlreadyExistsError(str(id))
        content_target = self.fs.new_file(_id=id)
        content.save(content_target)
        content_target.close()

    def del_object(self, id):
        return self.fs.delete(id)
 def create_image(self, file_name, data):
     grid_fs = GridFS(self.db)
     grid_fs_file = grid_fs.find_one({'filename': file_name})
     if grid_fs_file is not None:
         raise InvalidUsage('Product by this name already exist',
                            status_code=422)
     else:
         with grid_fs.new_file(filename=file_name) as fp:
             fp.write(data)
             return json.dumps({'status': 'File saved successfully'}), 200
Example #23
0
 def queryId(self, db, query):
     '''
     通过文件属性获取文件id,id为文件删除,读取,下载做准备
     :param db:
     :param query:
     :return:
     '''
     fs = GridFS(db, self.file_collection)
     data = fs.find_one(query)
     return data._id
class GridFsFileRepository(FileRepository):
    def __init__(self, database: Database, collection: str):
        self._fs = GridFS(database, collection)

    def query_by_file_name(self, file_name: str) -> File:
        grid_out = self._fs.find_one({"filename": file_name})

        if grid_out is None:
            raise NotFoundError(f"File {file_name} not found.")
        else:
            return GridFsFile(grid_out)
Example #25
0
def get_group_avatar(object_id):
    db = get_db()
    fs = GridFS(db, collection="images")
    image_obj = fs.find_one({"_id": ObjectId(object_id)})
    # print(image_obj.content_type)
    response = make_response(image_obj.read())
    content_type = image_obj.content_type
    if not content_type:
        content_type, _ = guess_type(image_obj.filename)
    response.mimetype = content_type
    return response
Example #26
0
class GridFSPickleDict(MutableMapping):
    """A dictionary-like interface for MongoDB GridFS"""

    def __init__(self, db_name, connection: MongoClient = None):
        """
        Args:
            db_name: database name (be careful with production databases)
            connection: MongoDB connection instance to use instead of creating a new one
        """
        self.connection = connection or MongoClient()
        self.db = self.connection[db_name]
        self.fs = GridFS(self.db)

    def __getitem__(self, key):
        result = self.fs.find_one({'_id': key})
        if result is None:
            raise KeyError
        return pickle.loads(bytes(result.read()))

    def __setitem__(self, key, item):
        self.__delitem__(key)
        self.fs.put(pickle.dumps(item, protocol=PICKLE_PROTOCOL), **{'_id': key})

    def __delitem__(self, key):
        res = self.fs.find_one({'_id': key})
        if res is not None:
            self.fs.delete(res._id)

    def __len__(self):
        return self.db['fs.files'].count()

    def __iter__(self):
        for d in self.fs.find():
            yield d._id

    def clear(self):
        self.db['fs.files'].drop()
        self.db['fs.chunks'].drop()

    def __str__(self):
        return str(dict(self.items()))
Example #27
0
    def prepare_data(self):
        if isinstance(self.logo, BufferedReader):
            grid = GridFS(self._modeldb.database)

            data = self.logo.read()
            data_hash = hashlib.md5(data).hexdigest()
            existing = grid.find_one(dict(md5=data_hash))
            if existing:
                oid = existing._id
            else:
                oid = grid.put(data)
            self.logo = oid
Example #28
0
def upload_populated_pdf_to_storage(mongo_job_id):
    client = MongoClient(settings.MONGO_HOST, 27017)
    db = client.emondo
    pdf_job_collection = db.pdf_job
    pdf_job = pdf_job_collection.find_one({'_id': ObjectId(mongo_job_id)})
    form_response = FormDocumentResponse.objects.get(
        pk=pdf_job.get('form_response_id'))
    grid_fs = GridFS(db, collection="fs")
    pdf_file = grid_fs.find_one(
        {'_id': ObjectId(pdf_job.get('output_file_id'))})
    form_response.populated_document.save(pdf_file.filename,
                                          ContentFile(pdf_file.read()))
Example #29
0
def main_config():
    if (not session.get('logged_in')):
        return redirect(url_for('login'))
    access = check_access('main_config')
    if (request.method == 'POST' and access != 'rw'):
        abort(403)

    filenames = {'pubkey': 'rsa_1024_pub.pem', 'privkey': 'rsa_1024_priv.pem'}
    certs = None
    if (request.method == 'POST'):
        changes = to_dict(request.form)
        if (changes['action'] == 'configurations'):
            del (changes['action'])
            mongodb.db.configs.update_one({'name': changes['name']},
                                          {'$set': changes})
        elif (changes['action'] == 'genkeys'):
            certs = genkeypair()
            gridfsdb = database.Database(
                MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs')
            fs = GridFS(gridfsdb)
            for key in ['privkey', 'pubkey']:
                oldfile = fs.find_one({'filename': filenames[key]})
                if (oldfile is not None):
                    fs.delete(oldfile._id)
                fs.put(certs[key].copy(),
                       content_type="text/plain",
                       filename=filenames[key])

    result = mongodb.db.configs.find({}, {'_id': 0})
    gridfsdb = database.Database(
        MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'images')
    fs = GridFS(gridfsdb)
    avatar = fs.exists(filename='avatar.png')
    background = fs.exists(filename='background.png')
    logo = fs.exists(filename='logo.png')
    imgresult = {'avatar': avatar, 'background': background, 'logo': logo}

    if (certs is None):
        gridfsdb = database.Database(
            MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs')
        fs = GridFS(gridfsdb)
        if (fs.exists(filename=filenames['pubkey'])):
            file = fs.get_last_version(filenames['pubkey'])
            pubkey = file.read()
            certs = {'pubkey': pubkey}
    languages = copy_cursor(
        mongodb.db.languages.find({}, sort=([('name', 1), ('variant', 1)])))
    return render_template('main_config.html',
                           access=access,
                           images=imgresult,
                           configs=result,
                           certs=certs,
                           languages=languages)
Example #30
0
class ScrapeFacts(CoreJob):
    author = "mra"
    schedule = "*/30 * * * * "

    def execute(self, test=False, *args, **kwargs):
        """
        :param test: control, if test don't write data to mongoDB
        :param args:
        :param kwargs:
        :return:
        """
        self.target = self.config.driverlicense.collection.data
        self.gfs = GridFS(self.target.connection[self.target.database])
        self.download(test)

    def download(self, test):
        """
        This function retrieves the urls from agof website,
        and saves the urls and corresponding files in the
        database
        :param test:
        :return:
        """
        # get agof website's content
        rv = requests.get(url)
        body = rv.content.decode("utf-8")
        # extract desired links from the content
        links = re.findall("href=[\"\'](.+?)[\"\']", body)
        xls_all = [
            href for href in links
            if href.endswith(".xls") or href.endswith(".xlsx")
        ]
        xls = [
            filename for filename in xls_all if "Angebote_Ranking" in filename
        ]
        self.logger.info("found [%d] xlsx files", len(xls))

        download = 0
        for link in xls:
            # check if file already exists in the database
            doc = self.gfs.find_one({"filename": link})
            if doc is None:
                # if not save the file to mongoDB
                self.logger.info("download [%s]", link)
                rv = requests.get(link)
                if not test:
                    self.gfs.put(rv.content, filename=link)
            download += 1
            self.progress(download / len(xls))
        self.logger.info("successfully retrieved [%d] of [%d] files", download,
                         len(xls))
        enqueue(ProcessFiles, concurrent=True)
Example #31
0
    def get_file_id(self, database: str, collection: str, filename: str) -> str:
        """
        通过文件名获取文件id

        @param {str} database - 文件保存到的数据库名
        @param {str} collection - 文件保存到的集合名(table)
        @param {str} filename - 文件名

        @returns {str} - 保存后的文件ID
        """
        _fs = GridFS(self.db[database], collection)
        _object_id = _fs.find_one({'filename': filename})._id
        return str(_object_id)
Example #32
0
def replicate_file(node_id, file_hash_ring_id):
    mongo = MongoClient('localhost', 27017)
    db = mongo['rpfs_slave_db_' + str(node_id)]
    fs = GridFS(db)
    topology = list(db.topology.find())

    replica_node = get_next_node(topology, file_hash_ring_id)

    if not replica_node:
        return

    f = fs.find_one({'filename': str(file_hash_ring_id)})

    if f:
        response = send_file(f, replica_node)
        assert response.status_code == 200
Example #33
0
class SimpleFrameMongo(object):

    config_settings = None

    def __init__(self):

        db_name = self.config_settings['name']
        mongo_host = self.config_settings['mongo_host']
        username = self.config_settings['username']
        password = self.config_settings['password']

        self.db = pymongo.MongoClient(mongo_host)[db_name]
        self.db.authenticate(username, password)

        self.fs = GridFS(self.db)

    def write(self, name, df, metadata=''):
        if name in self.fs.list():
            warnings.warn(
                'filename `{}` already exists, nothing inserted'.format(name))
            return

        return self.fs.put(pkl.dumps(df, pkl.HIGHEST_PROTOCOL),
                           filename=name,
                           metadata=metadata)

    def delete(self, name):
        doc = self.db['fs.files'].find_one({'filename': name})
        if doc:
            _id = doc.get('_id')
            self.fs.delete(_id)

    def read(self, name):
        return pkl.loads(self.fs.find_one({'filename': name}).read())

    def read_metadata(self, name):
        return self.db['fs.files'].find_one({'filename': name}).get('metadata')

    def __enter__(self):
        return self

    def __exit__(self, et, ev, tb):
        self.db.client.close()
Example #34
0
def set_setup():
    dev_uuid = request.args.get('uuid')
    code = request.args.get('code')
    salt = request.args.get('salt')
    if ((dev_uuid and code and salt) is None):
        abort(403)
    griddb = database.Database(MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT),'certs')
    fs = GridFS(griddb)
    if (fs.find_one({'filename': 'rsa_1024_priv.pem'})):
        key_string = fs.get_last_version('rsa_1024_priv.pem').read()
        priv = RSA.load_key_string(key_string)
    try:
        ctxt = b64decode(code.replace(' ','+'))
        decrypted_text = priv.private_decrypt(ctxt, RSA.pkcs1_padding)
        hash = hashlib.md5('%s:%s:%s' % (dev_uuid,decrypted_text,salt)).hexdigest()
    except TypeError as e:
        return output_json({'Result' : 'Error: %s' % e}, 200, def_headers)
    find_result = mongodb.db.trusts.find_one({'uuid': dev_uuid},{'_id':0,'timestamp':1})
    if (find_result is not None):
        # Timeout after 600 seconds (10 minutes)
        # After timeout, users are able to setup trustee again
        if (int(find_result['timestamp']) + 600 <= time()):
            update_result = mongodb.db.trusts.update_one(
                {'uuid': dev_uuid},
                {'$set': {'secret': decrypted_text, 'timestamp': int(time())} },
                upsert=True
            )
        else:
            return output_json({'Result' : 'Too early'}, 403, def_headers)
    else:
        update_result = mongodb.db.trusts.update_one(
            {'uuid': dev_uuid},
            {'$set': {'secret': decrypted_text, 'timestamp': int(time())} },
            upsert=True
        )
    session['logged_in'] = True
    return output_json({'Result' : hash}, 200, def_headers)
Example #35
0
class Database():
    def __init__(self):
        # Create the connection
        if config['valid']:
            mongo_uri = config['database']['mongo_uri']
        else:
            mongo_uri = 'mongodb://localhost'

        connection = pymongo.MongoClient(mongo_uri)

        # Version Check
        server_version = connection.server_info()['version']
        if int(server_version[0]) < 3:
            raise UserWarning('Incompatible MongoDB Version detected. Requires 3 or higher. Found {0}'.format(server_version))

        # Connect to Databases.
        voldb = connection['voldb']
        voldbfs = connection['voldbfs']

        # Get Collections
        self.vol_sessions = voldb.sessions
        self.vol_comments = voldb.comments
        self.vol_plugins = voldb.plugins
        self.vol_datastore = voldb.datastore
        self.vol_files = GridFS(voldbfs)

        # Indexes
        self.vol_comments.create_index([('freetext', 'text')])

        self.vol_plugins.create_index([('$**', 'text')])

    ##
    # Sessions
    ##
    def get_allsessions(self):
        sessions = self.vol_sessions.find()
        return [x for x in sessions]

    def get_session(self, session_id):
        session_id = ObjectId(session_id)
        session = self.vol_sessions.find_one({'_id': session_id})
        return session

    def create_session(self, session_data):
        session_id = self.vol_sessions.insert_one(session_data).inserted_id
        return session_id

    def update_session(self, session_id, new_values):
        session_id = ObjectId(session_id)
        self.vol_sessions.update_one({'_id': session_id}, {"$set": new_values })
        return True

    ##
    # Comments
    ##
    def get_commentbyid(self, comment_id):
        comment_id = ObjectId(comment_id)
        comment = self.vol_comments.find({'_id': comment_id})
        return comment

    def get_commentbysession(self, session_id):
        session_id = ObjectId(session_id)
        comments = self.vol_comments.find({'session_id': session_id}).sort("created", -1)
        return [row for row in comments]

    def create_comment(self, comment_data):
        comment_id = self.vol_comments.insert_one(comment_data).inserted_id
        return comment_id

    def search_comments(self, search_text, session_id=None):
        results = []
        rows = self.vol_comments.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                session_id = ObjectId(session_id)
                if row['session_id'] == session_id:
                    results.append(row)
            else:
                results.append(row)
        return results

    ##
    # Plugins
    ##

    def get_pluginbysession(self, session_id):
        session_id = ObjectId(session_id)
        result_rows = []
        plugin_output = self.vol_plugins.find({'session_id': session_id}).sort("created", -1)
        for row in plugin_output:
            result_rows.append(row)

        # result_rows.sort(key=lambda d: (d["plugin_name"]))

        return result_rows

    def get_pluginbyid(self, plugin_id):
        plugin_id = ObjectId(plugin_id)
        plugin_output = self.vol_plugins.find_one({'_id': plugin_id})
        if 'largedoc' in plugin_output:
            large_document_id = plugin_output['plugin_output']
            large_document = self.get_filebyid(large_document_id)
            plugin_output['plugin_output'] = json.loads(large_document.read())
        return plugin_output

    def get_plugin_byname(self, plugin_name, session_id):
        session_id = ObjectId(session_id)
        plugin_output = self.vol_plugins.find_one({'session_id': session_id, 'plugin_name': plugin_name})
        if plugin_output and 'largedoc' in plugin_output:
            large_document_id = plugin_output['plugin_output']
            large_document = self.get_filebyid(large_document_id)
            plugin_output['plugin_output'] = json.loads(large_document.read())
        return plugin_output

    def create_plugin(self, plugin_data):
        # Force session ID
        plugin_data['session_id'] = ObjectId(plugin_data['session_id'])
        plugin_id = self.vol_plugins.insert_one(plugin_data).inserted_id
        return plugin_id

    def search_plugins(self, search_text, session_id=None, plugin_name=None):
        results = []
        rows = self.vol_plugins.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                session_id = ObjectId(session_id)
                if row['session_id'] == session_id:
                    results.append(row)
            # This is the session filter from the main page.
            elif plugin_name:
                if row['plugin_name'] == plugin_name:
                    if search_text in str(row['plugin_output']):
                        results.append(row['session_id'])

            else:
                results.append(row)
        return results

    def update_plugin(self, plugin_id, new_values):
        plugin_id = ObjectId(plugin_id)
        if len(str(new_values)) > 12000000:
            print "Storing Large Document in GridFS"
            large_document = json.dumps(new_values['plugin_output'])
            large_document_id = self.create_file(large_document, 'sess_id', 'sha256', 'filename', pid=None, file_meta=None)
            new_values['plugin_output'] = large_document_id
            new_values['largedoc'] = 'True'

        self.vol_plugins.update_one({'_id': plugin_id}, {"$set": new_values})
        return True


    ##
    # File System
    ##
    def get_filebyid(self, file_id):
        file_id = ObjectId(file_id)
        file_object = self.vol_files.get(file_id)
        return file_object

    def list_files(self, session_id):
        session_id = ObjectId(session_id)
        results = self.vol_files.find({'session_id': session_id})
        return [row for row in results]

    def search_files(self, search_query):
        results = self.vol_files.find(search_query)
        return [row for row in results]

    def get_strings(self, file_id):
        file_id = ObjectId(file_id)
        results = self.vol_files.find_one({'filename': '{0}_strings.txt'.format(str(file_id))})
        return results

    def create_file(self, file_data, session_id, sha256, filename, pid=None, file_meta=None):
        if len(session_id) == 24:
            session_id = ObjectId(session_id)
        file_id = self.vol_files.put(file_data, filename=filename, sess_id=session_id, sha256=sha256, pid=pid, file_meta=file_meta)
        return file_id

    def drop_file(self, file_id):
        file_id = ObjectId(file_id)
        self.vol_files.delete(file_id)
        return True

    ##
    # DataStore
    ##

    def get_alldatastore(self):
        results = self.vol_datastore.find()
        return [row for row in results]

    def search_datastore(self, search_query):
        results = self.vol_datastore.find(search_query)
        return [row for row in results]

    def create_datastore(self, store_data):
        data_id = self.vol_datastore.insert_one(store_data).inserted_id
        return data_id

    def update_datastore(self, search_query, new_values):
        self.vol_datastore.update_one(search_query, {"$set": new_values})
        return True



    ##
    # Drop Session
    ##
    def drop_session(self, session_id):
        session_id = ObjectId(session_id)

        # Drop Plugins
        self.vol_plugins.delete_many({'session_id': session_id})
        # Drop Files
        results = self.vol_files.find({'session_id': session_id})
        for row in results:
            self.vol_files.delete(row['file_id'])
        # Drop DataStore
        self.vol_datastore.delete_many({'session_id': session_id})
        # Drop Notes
        self.vol_comments.delete_many({'session_id': session_id})
        # Drop session
        self.vol_sessions.delete_many({'_id': session_id})
Example #36
0
class Database():
    def __init__(self):
        # Create the connection
        if config.valid:
            mongo_uri = config.mongo_uri
        else:
            mongo_uri = 'mongodb://localhost'

        connection = pymongo.MongoClient(mongo_uri)

        # Version Check
        server_version = connection.server_info()['version']
        if int(server_version[0]) < 3:
            raise UserWarning('Incompatible MongoDB Version detected. Requires 3 or higher. Found {0}'.format(server_version))

        # Connect to Databases.
        voldb = connection['voldb']
        voldbfs = connection['voldbfs']

        # Get Collections
        self.vol_sessions = voldb.sessions
        self.vol_comments = voldb.comments
        self.vol_plugins = voldb.plugins
        self.vol_datastore = voldb.datastore
        self.vol_files = GridFS(voldbfs)

        # Indexes
        self.vol_comments.create_index([('freetext', 'text')])

        self.vol_plugins.create_index([('$**', 'text')])

    ##
    # Sessions
    ##
    def get_allsessions(self):
        sessions = self.vol_sessions.find()
        return [x for x in sessions]

    def get_session(self, sess_id):
        session = self.vol_sessions.find_one({'_id': sess_id})
        return session

    def create_session(self, sess_data):
        sess_id = self.vol_sessions.insert_one(sess_data).inserted_id
        return sess_id

    def update_session(self, sess_id, new_values):
        self.vol_sessions.update_one({'_id':sess_id},{"$set": new_values })
        return True

    ##
    # Comments
    ##
    def get_commentbyid(self, comment_id):
        comment = self.vol_comments.find({'_id': comment_id})
        return comment

    def get_commentbysession(self,session_id):
        comments = self.vol_comments.find({'session_id': session_id}).sort("created", -1)
        return [row for row in comments]

    def create_comment(self, comment_data):
        comment_id = self.vol_comments.insert_one(comment_data).inserted_id
        return comment_id

    def search_comments(self, search_text, session_id=None):
        results = []
        rows = self.vol_comments.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                if row['session_id'] == session_id:
                    results.append(row)
            else:
                results.append(row)
        return results

    ##
    # Plugins
    ##

    def get_pluginbysession(self, session_id):
        result_rows = []
        plugin_output = self.vol_plugins.find({'session_id': session_id}).sort("created", -1)
        for row in plugin_output:
            result_rows.append(row)

        #result_rows.sort(key=lambda d: (d["plugin_name"]))

        return result_rows

    def get_pluginbyid(self, plugin_id):
        plugin_output = self.vol_plugins.find_one({'_id': plugin_id})
        return plugin_output

    def get_plugin_byname(self, plugin_name, session_id):
        plugin_output = self.vol_plugins.find_one({'session_id': session_id, 'plugin_name':plugin_name})
        return plugin_output

    def create_plugin(self, plugin_data):
        plugin_id = self.vol_plugins.insert_one(plugin_data).inserted_id
        return plugin_id

    def search_plugins(self, search_text, session_id=None):
        results = []
        rows = self.vol_plugins.find({"$text": {"$search": search_text}})
        for row in rows:
            if session_id:
                if row['session_id'] == session_id:
                    results.append(row)
            else:
                results.append(row)
        return results

    def update_plugin(self, plugin_id, new_values):
        self.vol_plugins.update_one({'_id':plugin_id},{"$set": new_values })
        return True


    ##
    # File System
    ##
    def get_filebyid(self, file_id):
        file_object = self.vol_files.get(file_id)
        return file_object

    def list_files(self, sess_id):
        results = self.vol_files.find({'session_id': sess_id})
        return [row for row in results]

    def search_files(self, search_query):
        results = self.vol_files.find(search_query)
        return [row for row in results]

    def get_strings(self, file_id):
        results = self.vol_files.find_one({'filename': '{0}_strings.txt'.format(str(file_id))})
        return results

    def create_file(self, file_data, sess_id, sha256, filename, pid=None, file_meta=None):
        file_id = self.vol_files.put(file_data, filename=filename, sess_id=sess_id, sha256=sha256, pid=pid, file_meta=file_meta)
        return file_id


    ##
    # DataStore
    ##

    def get_alldatastore(self):
        results = self.vol_datastore.find()
        return [row for row in results]

    def search_datastore(self, search_query):
        results = self.vol_datastore.find(search_query)
        return [row for row in results]

    def create_datastore(self, store_data):
        data_id = self.vol_datastore.insert_one(store_data).inserted_id
        return data_id

    def update_datastore(self, search_query, new_values):
        self.vol_datastore.update_one(search_query, {"$set": new_values})
        return True


    ##
    # Drop Session
    ##
    def drop_session(self, session_id):

        # Drop Plugins
        self.vol_plugins.delete_many({'session_id': session_id})
        # Drop Files
        results = self.vol_files.find({'session_id': session_id})
        for row in results:
            self.vol_files.delete(row['file_id'])
        # Drop DataStore
        self.vol_datastore.delete_many({'session_id': session_id})
        # Drop Notes
        self.vol_comments.delete_many({'session_id': session_id})
        # Drop session
        self.vol_sessions.delete_many({'_id': session_id})