Example #1
0
def test_post_grid_calendar_returns_success_status(app, coverage,
                                                   get_app_context):
    filename = 'export_calendars.zip'
    path = os.path.join(
        os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
        'fixtures/gridcalendar/', filename)
    files = {'file': (open(path, 'rb'), 'export_calendars.zip')}
    raw = app.post('/coverages/jdr/grid_calendar', data=files)
    r = to_json(raw)
    assert raw.status_code == 200
    assert r.get('message') == 'OK'
    raw = app.get('/coverages')
    r = to_json(raw)
    assert len(r['coverages']) == 1
    assert 'grid_calendars_id' in r['coverages'][0]
    gridfs = GridFS(mongo.db)
    file_id = r['coverages'][0]['grid_calendars_id']
    assert gridfs.exists(ObjectId(file_id))
    #we update the file (it's the same, but that's not the point)
    files = {'file': (open(path, 'rb'), 'export_calendars.zip')}
    raw = app.post('/coverages/jdr/grid_calendar', data=files)
    assert raw.status_code == 200

    raw = app.get('/coverages')
    r = to_json(raw)
    assert len(r['coverages']) == 1
    assert 'grid_calendars_id' in r['coverages'][0]
    #it should be another file
    assert file_id != r['coverages'][0]['grid_calendars_id']
    #the previous file has been deleted
    assert not gridfs.exists(ObjectId(file_id))
    #and the new one exist
    assert gridfs.exists(ObjectId(r['coverages'][0]['grid_calendars_id']))
Example #2
0
def main_config():
    if (not session.get('logged_in')):
        return redirect(url_for('login'))
    access = check_access('main_config')
    if (request.method == 'POST' and access != 'rw'):
        abort(403)

    filenames = {'pubkey': 'rsa_1024_pub.pem', 'privkey': 'rsa_1024_priv.pem'}
    certs = None
    if (request.method == 'POST'):
        changes = to_dict(request.form)
        if (changes['action'] == 'configurations'):
            del (changes['action'])
            mongodb.db.configs.update_one({'name': changes['name']},
                                          {'$set': changes})
        elif (changes['action'] == 'genkeys'):
            certs = genkeypair()
            gridfsdb = database.Database(
                MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs')
            fs = GridFS(gridfsdb)
            for key in ['privkey', 'pubkey']:
                oldfile = fs.find_one({'filename': filenames[key]})
                if (oldfile is not None):
                    fs.delete(oldfile._id)
                fs.put(certs[key].copy(),
                       content_type="text/plain",
                       filename=filenames[key])

    result = mongodb.db.configs.find({}, {'_id': 0})
    gridfsdb = database.Database(
        MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'images')
    fs = GridFS(gridfsdb)
    avatar = fs.exists(filename='avatar.png')
    background = fs.exists(filename='background.png')
    logo = fs.exists(filename='logo.png')
    imgresult = {'avatar': avatar, 'background': background, 'logo': logo}

    if (certs is None):
        gridfsdb = database.Database(
            MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs')
        fs = GridFS(gridfsdb)
        if (fs.exists(filename=filenames['pubkey'])):
            file = fs.get_last_version(filenames['pubkey'])
            pubkey = file.read()
            certs = {'pubkey': pubkey}
    languages = copy_cursor(
        mongodb.db.languages.find({}, sort=([('name', 1), ('variant', 1)])))
    return render_template('main_config.html',
                           access=access,
                           images=imgresult,
                           configs=result,
                           certs=certs,
                           languages=languages)
    def test_delete_file_from_GridFS(self):
        content = "File content"
        gridfs = GridFS(self.db,
                collection=config.MONGODB_CONFIG['gridfs_collection'])
        new_file_id = gridfs.put(content)
        expected_file_data = gridfs.get(new_file_id)

        self.document['file_id'] = str(new_file_id)
        self.assertTrue(gridfs.exists(new_file_id))

        GridFSFileDeleter().delay(self.fake_id)

        self.assertFalse(gridfs.exists(new_file_id))
Example #4
0
    def upLoadFile(self, file_id, file_name, class_name, host, author,
                   is_published, file_type, file_data, remark, url):
        client = pymongo.MongoClient(self.db_url)
        db = client[self.db]
        # file_id = str(uuid.uuid1())
        filter_condition = {
            "file_id": file_id,
            "file_name": file_name,
            "class_name": class_name,
            # "file_path": file_path,
            # "url": "http://" + host + ":10018/files/download/" + file_id,
            "url": url,
            "host": host,
            "author": author,
            "file_type": file_type,
            "is_published": is_published,
            "download": 0,
            "remark": remark
        }
        gridfs_col = GridFS(db, collection="file_info")
        query = {"file_name": "", "author": ""}
        query["file_name"] = file_name
        query["author"] = author
        if gridfs_col.exists(query):
            result = {"code": "20000699", "message": "文件已经存在"}
        else:
            try:
                # with open(file_path, 'rb') as file_r:
                # file_data = file_data
                gridfs_col.put(data=file_data, **filter_condition)
                result = {"code": "20000600", "message": "文件上传成功"}
            except Exception as e:
                result = {"code": "20000699", "message": "文件上传失败,原因:" + e}

        return result
Example #5
0
class GridFSStorage(Storage):
    def __init__(self, host='localhost', port=27017, collection='fs'):
        for s in ('host', 'port', 'collection'):
            name = 'GRIDFS_' + s.upper()
            if hasattr(settings, name):
                setattr(self, s, getattr(settings, name))
        for s, v in zip(('host', 'port', 'collection'), (host, port, collection)):
            if v:
                setattr(self, s, v)
        self.db = Connection(host=self.host, port=self.port)[self.collection]
        self.fs = GridFS(self.db)

    def _save(self, name, content):
        self.fs.put(content, filename=name)
        return name

    def _open(self, name, *args, **kwars):
        return self.fs.get_last_version(filename=name)

    def delete(self, name):
        oid = fs.get_last_version(filename=name)._id
        self.fs.delete(oid)

    def exists(self, name):
        return self.fs.exists({'filename': name})

    def size(self, name):
        return self.fs.get_last_version(filename=name).length
Example #6
0
    def isExists(self, file_coll, filename):
        gridfs_col = GridFS(self.db, collection=file_coll)
        query = {"filename": ""}
        query["filename"] = filename

        if gridfs_col.exists(query):
            print(f'{filename}存在')
Example #7
0
    def test_it_saves_files(self):
        fileobj = BytesIO(b"these are the bytes")

        self.mongo.save_file("my-file", fileobj)

        gridfs = GridFS(self.mongo.db)
        assert gridfs.exists({"filename": "my-file"})
Example #8
0
def consulta(year, month, terminal, arquivo, id):
    """Script de linha de comando para consulta do arquivo XML."""
    fs = GridFS(db)
    _ids = []
    if id:
        _ids.append(ObjectId(id))
    else:
        filtro = {'metadata.contentType': 'text/xml'}
        if year and month:
            data_inicio = datetime(year, month, 1)
            filtro['metadata.dataescaneamento'] = {'$gt': data_inicio}
        if terminal:
            filtro['metadata.recinto'] = terminal
        if arquivo:
            filtro['filename'] = {'$regex': arquivo}
        cursor = db['fs.files'].find(filtro).limit(10)
        _ids = [row['_id'] for row in cursor]
    for _id in _ids:
        if fs.exists(_id):
            grid_out = fs.get(_id)
            raw = grid_out.read()
            encode = chardet.detect(raw)
            try:
                xml = raw.decode(encode['encoding'])
                print(xml)
                print(xmli.xml_todict(xml))
            except Exception as err:
                print(err)
Example #9
0
def setupcode():
    if (not session.get('logged_in')):
        return redirect(url_for('login'))
    gridfsdb = database.Database(
        MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), 'certs')
    fs = GridFS(gridfsdb)
    filename = 'rsa_1024_pub.pem'
    if (fs.exists(filename=filename)):
        file = fs.get_last_version(filename)
        pubkey = file.read().replace('\n', '').replace(
            '-----BEGIN PUBLIC KEY-----',
            '').replace('-----END PUBLIC KEY-----', '')
    server_address = mongodb.db.configs.find_one({}, {
        '_id': 0,
        'server_address': 1
    })['server_address']
    qrdata = pyqrcode.create(server_address + '|' + pubkey, mode='binary')
    output = StringIO()
    qrdata.svg(output, scale=6)
    contents = output.getvalue()
    output.close()
    response = virtualrest.response_class(contents,
                                          direct_passthrough=True,
                                          mimetype='image/svg+xml')
    response.headers.set('Content-Length', len(contents))
    return response
Example #10
0
    def test_it_saves_files(self):
        fileobj = BytesIO(b"these are the bytes")

        self.mongo.save_file("my-file", fileobj)

        gridfs = GridFS(self.mongo.db)
        assert gridfs.exists({"filename": "my-file"})
Example #11
0
class GridFSStorage(Storage):
    def __init__(self, host='localhost', port=27017, collection='fs'):
        for s in ('host', 'port', 'collection'):
            name = 'GRIDFS_' + s.upper()
            if hasattr(settings, name):
                setattr(self, s, getattr(settings, name))
        for s, v in zip(('host', 'port', 'collection'),
                        (host, port, collection)):
            if v:
                setattr(self, s, v)
        self.db = Connection(host=self.host, port=self.port)[self.collection]
        self.fs = GridFS(self.db)

    def _save(self, name, content):
        self.fs.put(content, filename=name)
        return name

    def _open(self, name, *args, **kwars):
        return self.fs.get_last_version(filename=name)

    def delete(self, name):
        oid = fs.get_last_version(filename=name)._id
        self.fs.delete(oid)

    def exists(self, name):
        return self.fs.exists({'filename': name})

    def size(self, name):
        return self.fs.get_last_version(filename=name).length
Example #12
0
class ObjectDB:
    def __init__(self, db):
        from gridfs import GridFS
        self.gridfs = GridFS(db)

    def __setitem__(self, key, obj):
        self.save(obj, key)

    def __getitem__(self, key):
        return self.load(key)

    def __delitem__(self, key):
        from pymongo.objectid import ObjectId
        if not isinstance(key, ObjectId):
            id = self.gridfs.get_last_version(key)._id
        else:
            id = key
        self.gridfs.delete(id)

    def __repr__(self):
        return "Key-value database"

    def keys(self):
        """Return list of filenames of objects in the gridfs store."""
        return self.gridfs.list()

    def object_ids(self):
        """Return list of id's of objects in the gridfs store, which
        are not id's of objects with filenames."""
        v = self.gridfs._GridFS__files.find({'filename': {
            '$exists': False
        }}, ['_id'])
        return [x['_id'] for x in v]

    def has_key(self, key):
        return self.gridfs.exists(filename=key)

    def save(self, obj, key=None, compress=None):
        """Save Python object obj to the grid file system self.gridfs.
        If key is None, the file is stored by MongoDB assigned
        ObjectID, and that id is returned.
        """
        from sage.all import dumps
        data = dumps(obj, compress=compress)
        if key is not None:
            self.gridfs.put(data, filename=key)
            return key
        else:
            # store by MongoDB assigned _id only, and return that id.
            return self.gridfs.put(data)

    def load(self, key, compress=True):
        from pymongo.objectid import ObjectId
        if isinstance(key, ObjectId):
            data = self.gridfs.get(key).read()
        else:
            data = self.gridfs.get_last_version(key).read()
        from sage.all import loads
        return loads(data, compress=compress)
Example #13
0
def _get_unique_filename(name, db_alias=DEFAULT_CONNECTION_NAME, collection_name='fs'):
    fs = GridFS(get_db(db_alias), collection_name)
    file_root, file_ext = os.path.splitext(name)
    count = itertools.count(1)
    while fs.exists(filename=name):
        # file_ext includes the dot.
        name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext))
    return name
Example #14
0
def _get_unique_filename(name, db_alias=DEFAULT_CONNECTION_NAME, collection_name='fs'):
    fs = GridFS(get_db(db_alias), collection_name)
    file_root, file_ext = os.path.splitext(name)
    count = itertools.count(1)
    while fs.exists(filename=name):
        # file_ext includes the dot.
        name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext))
    return name
Example #15
0
def _get_unique_filename(name):
    fs = GridFS(_get_db())
    file_root, file_ext = os.path.splitext(name)
    count = itertools.count(1)
    while fs.exists(filename=name):
        # file_ext includes the dot.
        name = os.path.join("%s_%s%s" % (file_root, next(count), file_ext))
    return name
Example #16
0
def mongo_image(db, image_id):
    """Lê imagem do Banco MongoDB. Retorna None se ID não encontrado."""
    fs = GridFS(db)
    _id = ObjectId(image_id)
    if fs.exists(_id):
        grid_out = fs.get(_id)
        image = grid_out.read()
        return image
    return None
Example #17
0
 def delete_files(self, set_name, ids):
     self.globalLock.acquire()
     try:
         grid_fs = GridFS(self.db, collection=set_name)
         for each in ids:
             if grid_fs.exists(document_or_id=ObjectId(each)):
                 grid_fs.delete(ObjectId(each))
     finally:
         self.globalLock.release()
 def insertFile(self, db, filePath, query):  # 将文件存入数据表
     fs = GridFS(db, self.setname)
     if fs.exists(query):
         print('已经存在该文件')
     else:
         with open(filePath, 'rb') as fileObj:
             data = fileObj.read()
             ObjectId = fs.put(data, filename=filePath.split('\\')[-1])
             fileObj.close()
         return ObjectId
Example #19
0
def mongo_image(db, image_id, bboxes=False):
    """Lê imagem do Banco MongoDB. Retorna None se ID não encontrado."""
    fs = GridFS(db)
    _id = ObjectId(image_id)
    if fs.exists(_id):
        grid_out = fs.get(_id)
        image = grid_out.read()
        if bboxes:
            predictions = grid_out.metadata.get('predictions')
            if predictions:
                bboxes = [pred.get('bbox') for pred in predictions]
                image = draw_bboxes(image, bboxes)
        return image
    return None
Example #20
0
 def insertFile(self, db, filePath, query, label):  #将文件存入数据表
     fs = GridFS(db, self.file_table)
     if fs.exists(query):
         # print('已经存在该文件')
         pass
     else:
         with open(filePath, 'rb') as fileObj:
             data = fileObj.read()
             ObjectId = fs.put(data,
                               filename=filePath.split('/')[-1],
                               label=label)
             #print(ObjectId)
             fileObj.close()
         return ObjectId
Example #21
0
 def downLoadFile(self, file_id):
     client = pymongo.MongoClient(self.db_url)
     db = client[self.db]
     gridfs_col = GridFS(db, collection="file_info")
     query = {"file_id": ""}
     query["file_id"] = file_id
     if gridfs_col.exists(query) == False:
         file_data = None
     else:
         file_name = self.filter_single_grid_info(
             file_id=file_id)[0]["file_name"]
         print(file_name)
         file_data = gridfs_col.get_version(file_id=file_id,
                                            version=-1).read()
     return file_data, file_name
Example #22
0
def padma_proxy(image_id):
    """Teste. Envia uma imagem para padma teste e repassa retorno."""
    db = app.config['mongodb']
    fs = GridFS(db)
    _id = ObjectId(image_id)
    if fs.exists(_id):
        grid_out = fs.get(_id)
        image = grid_out.read()
        # filename = grid_out.filename
        data = {}
        data['file'] = image
        headers = {}
        # headers['Content-Type'] = 'image/jpeg'
        r = requests.post(PADMA_URL + '/teste', files=data, headers=headers)
        result = r.text
    return result
Example #23
0
def process_image_request(file_id, size):
    """ Resizes images to size and returns a base64 encoded string representing
    the image """
    try:
        sizes = {
            'small': (140, 100),
            'medium': (400, 300),
            'large': (1200, 1000)
        }

        col = app.data.driver.db['files']
        image = col.find_one({'_id': ObjectId(file_id)})

        grid_fs = GridFS(app.data.driver.db)

        if not grid_fs.exists(_id=image['file']):
            return eve_abort(500, 'No file system found')

        im_stream = grid_fs.get_last_version(_id=image['file'])

        im = Image.open(im_stream)

        if size != 'original':
            im.thumbnail(sizes[size], Image.ANTIALIAS)

        img_io = io.BytesIO()

        im.save(img_io, 'PNG', quality=100)
        img_io.seek(0)

        encoded_img = base64.b64encode(img_io.read())

        dict = {
            'mimetype': 'image/png',
            'encoding': 'base64',
            'src': encoded_img
        }

        # Jsonify the dictionary and return it
        return jsonify(**dict)

        # Sends an image
        # return send_file(img_io, mimetype='image/png')
    except Exception as e:
        pass

    return eve_abort(404, 'Image not found or errors processing')
Example #24
0
class GridfsStorageBackend(object):
    def __init__(self, db, collection_name="storage"):
        from gridfs import GridFS
        self.fs = GridFS(db, collection_name)

    def __get_file_object(self, key):
        from gridfs import NoFile
        try:
            return self.fs.get_version(filename=key)
        except NoFile:
            raise KeyError(key)

    def __contains__(self, key):
        return self.fs.exists(filename=key)

    def __getitem__(self, key):
        return iterate_file_object(self.__get_file_object(key))

    def put_file(self, key, tmpfile):
        # ResourceDatabase will check to make sure the file doesn't already
        # exist before calling this, but in the event of a race condition this
        # may be called twice for a given key.  Fortunately this will cause no
        # issues, but it seems to result in two "versions" of the file being in
        # gridfs, which wastes some space (but not very much, if race
        # conditions are rare).
        #
        # FIXME: look into whether it is possible to drop old versions
        # automatically in gridfs
        with file(tmpfile) as f:
            self.fs.put(f, filename=key)

    def __delitem__(self, key):
        self.fs.delete(self.__get_file_object(key)._id)

    def keys(self):
        if six.PY3:
            return self.iterkeys()
        else:
            return self.fs.list()

    def iterkeys(self):
        return iter(self.fs.list())

    __iter__ = iterkeys

    def __len__(self):
        return len(self.fs.list())
Example #25
0
    def upLoadFile(self, file_coll, file_name, data_link):
        filter_condition = {"filename": file_name, "url": data_link}
        gridfs_col = GridFS(self.db, collection=file_coll)
        file_ = "0"
        query = {"filename": ""}
        query["filename"] = file_name

        if gridfs_col.exists(query):
            print('已经存在该文件')
        else:
            with open(file_name, 'rb') as file_r:
                file_data = file_r.read()
                file_ = gridfs_col.put(data=file_data,
                                       **filter_condition)  # 上传到gridfs
                print(file_)

        return file_
Example #26
0
class GridFsBackend(BaseBackend):
    '''
    A Mongo GridFS backend

    Expect the following settings:

    - `mongo_url`: The Mongo access URL
    - `mongo_db`: The database to store the file in.
    '''
    def __init__(self, name, config):
        super(GridFsBackend, self).__init__(name, config)

        self.client = MongoClient(config.mongo_url)
        self.db = self.client[config.mongo_db]
        self.fs = GridFS(self.db, self.name)

    def exists(self, filename):
        return self.fs.exists(filename=filename)

    @contextmanager
    def open(self, filename, mode='r', encoding='utf8'):
        if 'r' in mode:
            f = self.fs.get_last_version(filename)
            yield f if 'b' in mode else codecs.getreader(encoding)(f)
        else:  # mode == 'w'
            f = io.BytesIO() if 'b' in mode else io.StringIO()
            yield f
            params = {'filename': filename}
            if 'b' not in mode:
                params['encoding'] = encoding
            self.fs.put(f.getvalue(), **params)

    def read(self, filename):
        f = self.fs.get_last_version(filename)
        return f.read()

    def write(self, filename, content):
        return self.fs.put(self.as_binary(content), filename=filename)

    def delete(self, filename):
        for version in self.fs.find({'filename': filename}):
            self.fs.delete(version._id)

    def serve(self, filename):
        file = self.fs.get_last_version(filename)
        return send_file(file, mimetype=file.content_type)
Example #27
0
def get_image(_id, n, pil=False):
    """Recupera, recorta a imagem do banco e retorna."""
    db = app.config['mongodb']
    fs = GridFS(db)
    _id = ObjectId(_id)
    if fs.exists(_id):
        grid_data = fs.get(_id)
        if n is not None:
            n = int(n)
            preds = grid_data.metadata.get('predictions')
            if preds:
                bboxes = [pred.get('bbox') for pred in preds]
                if len(bboxes) >= n + 1 and bboxes[n]:
                    image = grid_data.read()
                    image = recorta_imagem(image, bboxes[n], pil=pil)
                    return image
    return None
Example #28
0
def account(conf, options, args):
    '''View details or summary of all accounts.'''

    con = Connection(conf['MONGODB_HOST'], conf['MONGODB_PORT'])
    db = con[conf['MONGODB_NAME']]
    fs = GridFS(db)

    if options.all:
        query = None
    elif len(args) == 2:
        query = {
            '_id': int(args[1])
        } if args[1].isdigit() else {
            'email': args[1]
        }
    else:
        log.error('account <email or _id> requires a valid email or _id')
        sys.exit(1)

    for acc in db.accounts.find(query):
        if str(acc['_id']).startswith('_'):
            continue
        print '%s [id:%s]' % (acc['email'], acc['id'])
        for key in acc:
            if key in ['email', '_id', 'id']:
                continue
            if key == 'items':
                try:
                    size = sum([fs.get(_id).length for _id in acc['items']])
                except NoFile:
                    log.warn('Account `%s` has some files missing:', _id)
                    # fail safe counting
                    size = 0
                    missing = []
                    for i in acc['items']:
                        if not fs.exists(i):
                            missing.append(i)
                        else:
                            size += fs.get(i).length
                print '    size: %s' % ppsize(size)
            print '    %s: %s' % (key, acc[key])
    if options.all:
        print db.accounts.count() - 1, 'accounts total'  # -1 for _autoinc

    con.disconnect()
Example #29
0
 def insertFile(self, db, filePath, query):
     '''
     save file
     :param db:
     :param filePath:
     :param query:
     :return:
     '''
     fs = GridFS(db, self.file_collection)
     if fs.exists(query):
         print('alreay exists!')
     else:
         with open(filePath, 'rb') as fileObj:
             data = fileObj.read()
             ObjectId = fs.put(data, filename=filePath.split('/')[-1])
             print(ObjectId)
             fileObj.close()
         return ObjectId
class FileRepository():
    def __init__(self, db: Database):
        self.fs = GridFS(db)

    def get_file(self, id: ObjectId) -> bytes:
        if not self.fs.exists(id):
            raise NonExistentError("The request file with the id " + str(id) +
                                   " does not exist")
        return self.fs.get(id).read()

    def put_file(self, file: bytes) -> ObjectId:
        return self.fs.put(file)

    def delete_file(self, id: ObjectId):
        self.fs.delete(id)

    def replace_file(self, old_id: ObjectId, new_file: bytes) -> ObjectId:
        self.delete_file(old_id)
        return self.put_file(new_file)
Example #31
0
 def upLoadFile(self, file_coll, file_path, file_name, task_id):
     client = pymongo.MongoClient(self.dbURL)
     db = client["CrowdData"]
     filter_condition = {
         "fileName": file_name,
         "taskId": task_id,
         "filePath": file_path
     }
     gridfs_col = GridFS(db, collection=file_coll)
     file_ = "0"
     if gridfs_col.exists(filter_condition):
         print('已经存在该文件')
     else:
         with open(file_path, 'rb') as file_r:
             file_data = file_r.read()
             file_ = gridfs_col.put(data=file_data,
                                    **filter_condition)  # 上传到gridfs
             print(file_)
     return file_
Example #32
0
class GridFsBackendTest(BackendTestCase):
    @pytest.fixture(autouse=True)
    def setup(self):
        self.client = MongoClient()
        self.db = self.client[TEST_DB]
        self.gfs = GridFS(self.db, 'test')

        self.config = Config({
            'mongo_url': 'mongodb://localhost:27017',
            'mongo_db': TEST_DB,
        })
        self.backend = GridFsBackend('test', self.config)
        yield
        self.client.drop_database(TEST_DB)

    def put_file(self, filename, content):
        self.gfs.put(content, filename=filename, encoding='utf-8')

    def get_file(self, filename):
        file = self.gfs.get_last_version(filename)
        assert file is not None
        return file.read()

    def file_exists(self, filename):
        return self.gfs.exists(filename=filename)

    def test_default_bucket(self):
        backend = GridFsBackend('test_bucket', self.config)
        assert backend.fs._GridFS__collection.name == 'test_bucket'

    def test_config(self):
        assert self.backend.client.address == ('localhost', 27017)
        assert self.backend.db.name == TEST_DB

    def test_delete_with_versions(self, faker):
        filename = 'test.txt'
        self.put_file(filename, faker.sentence())
        self.put_file(filename, faker.sentence())
        assert self.gfs.find({'filename': filename}).count() == 2

        self.backend.delete(filename)
        assert not self.file_exists(filename)
Example #33
0
    def upLoadFile(self, file_name, collection, data_link, host, author):
        client = pymongo.MongoClient(self.db_url)
        db = client[self.db]
        filter_condition = {"filename": file_name, "url": data_link, "host": host, "author": author}
        gridfs_col = GridFS(db, collection=collection)
        file_ = "0"
        query = {"filename": "", "author": ""}
        query["filename"] = file_name
        query["author"] = author
        if gridfs_col.exists(query):
            return {"result": "file is exist"}
        else:
            try:
                with open(file_name, 'rb') as file_r:
                    file_data = file_r.read()
                    file_ = gridfs_col.put(data=file_data, **filter_condition)  # 上传到gridfs
            except:
                file_ = {"result": "upload file is not exist"}

        return file_
Example #34
0
def preenche_bbox(db, engine, limit=2000, start=None):
    Session = sessionmaker(bind=engine)
    session = Session()
    if start:
        lista_conformidade = session.query(Conformidade) \
            .filter(Conformidade.dataescaneamento >= start) \
            .filter(Conformidade.bbox_classe.is_(None)).limit(limit).all()
    else:
        lista_conformidade = session.query(Conformidade) \
            .filter(Conformidade.bbox_classe.is_(None)).limit(limit).all()
    tempo = time.time()
    qtde = 0
    try:
        for conformidade in lista_conformidade:
            classe = None
            score = None
            fs = GridFS(db)
            _id = ObjectId(conformidade.id_imagem)
            if fs.exists(_id):
                grid_data = fs.get(_id)
                preds = grid_data.metadata.get('predictions')
                if preds:
                    bboxes = preds[0].get('bbox')
                    image = grid_data.read()
                    image = recorta_imagem(image, bboxes, pil=True)
                    classe = preds[0].get('class')
                    score = preds[0].get('score')
            if classe:
                conformidade.bbox_classe = classe
                conformidade.bbox_score = score
                conformidade.laplacian = calcula_laplacian(image)
                session.add(conformidade)
                qtde += 1
        session.commit()
    except Exception as err:
        logger.error(err, exc_info=True)
        session.rollback()
    tempo = time.time() - tempo
    tempo_registro = 0 if (qtde == 0) else (tempo / qtde)
    logger.info(f'{qtde} bbox preenchidos em {tempo} segundos.' +
                f'{tempo_registro} por registro')
Example #35
0
def account(conf, options, args):
    '''View details or summary of all accounts.'''

    con = Connection(conf['MONGODB_HOST'], conf['MONGODB_PORT'])
    db = con[conf['MONGODB_NAME']]
    fs = GridFS(db)

    if options.all:
        query = None
    elif len(args) == 2:
        query = {'_id': int(args[1])} if args[1].isdigit() else {'email': args[1]}
    else:
        log.error('account <email or _id> requires a valid email or _id')
        sys.exit(1)

    for acc in db.accounts.find(query):
        if str(acc['_id']).startswith('_'):
            continue
        print '%s [id:%s]' % (acc['email'], acc['id'])
        for key in acc:
            if key in ['email', '_id', 'id']:
                continue
            if key == 'items':
                try:
                    size = sum([fs.get(_id).length for _id in acc['items']])
                except NoFile:
                    log.warn('Account `%s` has some files missing:', _id)
                    # fail safe counting
                    size = 0
                    missing = []
                    for i in acc['items']:
                        if not fs.exists(i):
                            missing.append(i)
                        else:
                            size += fs.get(i).length
                print'    size: %s' % ppsize(size)
            print '    %s: %s' % (key, acc[key])
    if options.all:  print db.accounts.count()-1, 'accounts total' # -1 for _autoinc

    con.disconnect()
Example #36
0
def get_file(rtype, filename):
    if (not session.get('logged_in')):
        return redirect(url_for('login'))

    if (rtype not in ['audios', 'videos', 'flags', 'images', 'thumbs']):
        abort(404)
    gridfsdb = database.Database(
        MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT), rtype)
    fs = GridFS(gridfsdb)
    if (fs.exists(filename=filename)):
        file = fs.get_last_version(filename)
        mime = mimetypes.guess_type(filename)[0]
        response = virtualrest.response_class(file,
                                              direct_passthrough=True,
                                              mimetype=mime)
        response.headers.set('Content-Length', file.length)
        response.headers.set('Cache-Control',
                             'no-cache, no-store, must-revalidate')
        response.headers.set('Pragma', 'no-cache')
        response.headers.set('Expires', '0')
        return response
    abort(404)
Example #37
0
def process_image_request(file_id, size):
    """ Resizes images to size and returns a base64 encoded string representing
    the image """ 
    
    sizes = {'small': (140,100),
             'medium': (400, 300),
             'large': (1200, 1000)
             }

    col = app.data.driver.db['files']
    image = col.find_one({'_id': ObjectId(file_id)})
    
    grid_fs = GridFS(app.data.driver.db)
    
    if not grid_fs.exists(_id=image['file']):
        eve_abort(500, 'No file system found')
    
    im_stream = grid_fs.get_last_version(_id=image['file']) 
    
    im = Image.open(im_stream)
    
    if size != 'original':
        im.thumbnail(sizes[size], Image.ANTIALIAS)
    
    img_io = io.BytesIO()
    
    im.save(img_io, 'PNG', quality=100)
    img_io.seek(0)

    encoded_img = base64.b64encode(img_io.read())
    
    dict = {'mimetype': 'image/png',
            'encoding': 'base64', 
            'src': encoded_img
            }
    
    # Jsonify the dictionary and return it
    return jsonify(**dict)
Example #38
0
def process_request(file_id):
    """ This is the router actially for processing
    """
    
    if has_permission():
        col = app.data.driver.db['files']
        file = col.find_one({'_id': ObjectId(file_id)})
        
        if not file:
            eve_abort(404, 'No file found')
        
        try:
            grid_fs = GridFS(app.data.driver.db)
            if not grid_fs.exists(_id=file['file']):
                eve_abort(404, 'No file found')
            
            stream = grid_fs.get_last_version(_id=file['file'])
            
            response = make_response(stream.read())
            response.mimetype = stream.content_type
            return response
        except NoFile:
            eve_abort(404, 'No file found')
Example #39
0
class FileStoreMongo(FileStore):
    """
    Filestore database using GridFS (see :mod:`gridfs`)

    :arg pymongo.database.Database connection: MongoDB database object
    """

    def __init__(self, connection):
        self._conn=connection
        self.new_context()
        self._fs=GridFS(self.database)

    def _filename(self, **kwargs):
        return {'session': kwargs.get('session', kwargs.get('cell_id', 'SESSION NOT FOUND')), 'filename': kwargs['filename']}
    @Debugger
    def new_file(self, **kwargs):
        """
        See :meth:`FileStore.new_file`

        :rtype: :class:`gridfs.grid_file.GridIn`
        """
        self.delete_files(**kwargs)
        log("FS Creating %s"%self._filename(**kwargs))
        return self._fs.new_file(**self._filename(**kwargs))

    @Debugger
    def delete_files(self, **kwargs):
        """
        See :meth:`FileStore.delete_files`
        """
        while self._fs.exists(self._filename(**kwargs)):
            self._fs.delete(self._fs.get_last_version(**self._filename(**kwargs))._id)

    @Debugger
    def get_file(self, **kwargs):
        """
        See :meth:`FileStore.get_file`

        :rtype: :class:`gridfs.grid_file.GridOut`
        """
        if self._fs.exists(self._filename(**kwargs)):
            return self._fs.get(self._fs.get_last_version(**self._filename(**kwargs))._id)
        else:
            return None
    
    @Debugger
    def create_file(self, file_handle, **kwargs):
        """
        See :meth:`FileStore.create_file`
        """
        with self.new_file(**kwargs) as f:
            f.write(file_handle.read())

    @Debugger
    def copy_file(self, file_handle, **kwargs):
        """
        See :meth:`FileStore.copy_file`
        """
        file_handle.write(self.get_file(**kwargs).read())

    @Debugger
    def new_context(self):
        """
        See :meth:`FileStore.new_context`
        """
        self.database=pymongo.database.Database(self._conn, mongo_config['mongo_db'])
        uri=mongo_config['mongo_uri']
        if '@' in uri:
            # strip off optional mongodb:// part
            if uri.startswith('mongodb://'):
                uri=uri[len('mongodb://'):]
            result=self.database.authenticate(uri[:uri.index(':')],uri[uri.index(':')+1:uri.index('@')])
            if result==0:
                raise Exception("MongoDB authentication problem")

    @Debugger
    def new_context_copy(self):
        """
        See :meth:`FileStore.new_context_copy`
        """
        return type(self)(self._conn)

    valid_untrusted_methods=()
 path = os.path.abspath(os.path.join(root, item))
 full_path = '%s:%s' % (server, path)
 try:
     # Get the MD5 hash of the file
     handle = open(path, 'rb')
     digest = hashlib.md5()
     for block in iter(lambda: handle.read(1024*1024), ""):
         digest.update(block)
     handle.seek(0)
     md5 = digest.hexdigest()
 except IOError:
     logging.error('unable to read file %s', full_path)
     error_count += 1
     continue
 # Add the file to MongoDB if it isn't already added
 if fs.exists(md5=md5):
     db['%s.files' % args.collection].update({'md5': md5}, 
         {'$addToSet': {'aliases': full_path}})
     logging.debug('Updated file %s', full_path)
 else:
     kwargs = {'filename': item, 'aliases': [full_path]}
     mimetype = args.mimetype or \
         mimetypes.guess_type(item, strict=False)[0]
     if mimetype:
         kwargs['contentType'] = mimetype
     try:
         _id = fs.put(handle, **kwargs)
         write_count += 1
         logging.debug('Wrote file %s', full_path)
     except IOError:
         logging.error('unable to write file %s', full_path)
    def climate_itp_weight_thiessen(conn, db_model, subbsn_id, geodata2dbdir):
        """Generate and import weight information using Thiessen polygon method.

        Args:
            conn:
            db_model: workflow database object
            subbsn_id: subbasin id
            geodata2dbdir: directory to store weight data as txt file
        """
        spatial_gfs = GridFS(db_model, DBTableNames.gridfs_spatial)
        # read mask file from mongodb
        mask_name = str(subbsn_id) + '_MASK'
        if not spatial_gfs.exists(filename=mask_name):
            raise RuntimeError('%s is not existed in MongoDB!' % mask_name)
        mask = db_model[DBTableNames.gridfs_spatial].files.find({'filename': mask_name})[0]
        ysize = int(mask['metadata'][RasterMetadata.nrows])
        xsize = int(mask['metadata'][RasterMetadata.ncols])
        nodata_value = mask['metadata'][RasterMetadata.nodata]
        dx = mask['metadata'][RasterMetadata.cellsize]
        xll = mask['metadata'][RasterMetadata.xll]
        yll = mask['metadata'][RasterMetadata.yll]

        data = spatial_gfs.get(mask['_id'])

        total_len = xsize * ysize
        fmt = '%df' % (total_len,)
        data = unpack(fmt, data.read())
        # print(data[0], len(data), type(data))

        # count number of valid cells
        num = 0
        for type_i in range(0, total_len):
            if abs(data[type_i] - nodata_value) > UTIL_ZERO:
                num += 1

        # read stations information from database
        metadic = {RasterMetadata.subbasin: subbsn_id,
                   RasterMetadata.cellnum: num}
        site_lists = db_model[DBTableNames.main_sitelist].find({FieldNames.subbasin_id: subbsn_id})
        site_list = next(site_lists)
        clim_db_name = site_list[FieldNames.db]
        p_list = site_list.get(FieldNames.site_p)
        m_list = site_list.get(FieldNames.site_m)
        pet_list = site_list.get(FieldNames.site_pet)
        # print(p_list)
        # print(m_list)
        hydro_clim_db = conn[clim_db_name]

        type_list = [DataType.m, DataType.p, DataType.pet]
        site_lists = [m_list, p_list, pet_list]
        if pet_list is None:
            del type_list[2]
            del site_lists[2]

        # if storm_mode:  # todo: Do some compatible work for storm and longterm models.
        #     type_list = [DataType.p]
        #     site_lists = [p_list]

        for type_i, type_name in enumerate(type_list):
            fname = '%d_WEIGHT_%s' % (subbsn_id, type_name)
            # print(fname)
            if spatial_gfs.exists(filename=fname):
                x = spatial_gfs.get_version(filename=fname)
                spatial_gfs.delete(x._id)
            site_list = site_lists[type_i]
            if site_list is not None:
                site_list = site_list.split(',')
                # print(site_list)
                site_list = [int(item) for item in site_list]
                metadic[RasterMetadata.site_num] = len(site_list)
                # print(site_list)
                q_dic = {StationFields.id: {'$in': site_list},
                         StationFields.type: type_list[type_i]}
                cursor = hydro_clim_db[DBTableNames.sites].find(q_dic).sort(StationFields.id, 1)

                # meteorology station can also be used as precipitation station
                if cursor.count() == 0 and type_list[type_i] == DataType.p:
                    q_dic = {StationFields.id.upper(): {'$in': site_list},
                             StationFields.type.upper(): DataType.m}
                    cursor = hydro_clim_db[DBTableNames.sites].find(q_dic).sort(StationFields.id, 1)

                # get site locations
                id_list = list()
                loc_list = list()
                for site in cursor:
                    if site[StationFields.id] in site_list:
                        id_list.append(site[StationFields.id])
                        loc_list.append([site[StationFields.x], site[StationFields.y]])
                # print('loclist', locList)
                # interpolate using the locations
                myfile = spatial_gfs.new_file(filename=fname, metadata=metadic)
                with open(r'%s/weight_%d_%s.txt' % (geodata2dbdir, subbsn_id,
                                                    type_list[type_i]), 'w') as f_test:
                    for y in range(0, ysize):
                        for x in range(0, xsize):
                            index = int(y * xsize + x)
                            if abs(data[index] - nodata_value) > UTIL_ZERO:
                                x_coor = xll + x * dx
                                y_coor = yll + (ysize - y - 1) * dx
                                near_index = 0
                                line, near_index = ImportWeightData.thiessen(x_coor, y_coor,
                                                                             loc_list)
                                myfile.write(line)
                                fmt = '%df' % (len(loc_list))
                                f_test.write('%f %f ' % (x, y) + unpack(fmt, line).__str__() + '\n')
                myfile.close()
class GridFsBackendTest(BackendTestCase):
    hasher = 'md5'

    @pytest.fixture
    def pngimage(self, pngfile):
        with open(pngfile, 'rb') as f:
            yield f

    @pytest.fixture
    def jpgimage(self, jpgfile):
        with open(jpgfile, 'rb') as f:
            yield f

    @pytest.fixture(autouse=True)
    def setup(self):
        self.client = MongoClient()
        self.db = self.client[TEST_DB]
        self.gfs = GridFS(self.db, 'test')

        self.config = Config({
            'mongo_url': 'mongodb://localhost:27017',
            'mongo_db': TEST_DB,
        })
        self.backend = GridFsBackend('test', self.config)
        yield
        self.client.drop_database(TEST_DB)

    def put_file(self, filename, content):
        self.gfs.put(content, filename=filename, encoding='utf-8')

    def get_file(self, filename):
        file = self.gfs.get_last_version(filename)
        assert file is not None
        return file.read()

    def file_exists(self, filename):
        return self.gfs.exists(filename=filename)

    def test_default_bucket(self):
        backend = GridFsBackend('test_bucket', self.config)
        assert backend.fs._GridFS__collection.name == 'test_bucket'

    def test_config(self):
        assert self.backend.client.address == ('localhost', 27017)
        assert self.backend.db.name == TEST_DB

    def test_delete_with_versions(self, faker):
        filename = 'test.txt'
        self.put_file(filename, faker.sentence())
        self.put_file(filename, faker.sentence())
        assert self.gfs.find({'filename': filename}).count() == 2

        self.backend.delete(filename)
        assert not self.file_exists(filename)

    def test_write_pngimage(self, pngimage, utils):
        filename = 'test.png'
        content = six.binary_type(pngimage.read())
        content_type = mimetypes.guess_type(filename)[0]
        f = utils.filestorage(filename, content, content_type)
        self.backend.write(filename, f)

        with self.backend.open(filename, 'rb') as f:
            assert f.content_type == content_type

        self.assert_bin_equal(filename, content)

    def test_write_jpgimage(self, jpgimage, utils):
        filename = 'test.jpg'
        content = six.binary_type(jpgimage.read())
        content_type = mimetypes.guess_type(filename)[0]
        f = utils.filestorage(filename, content, content_type)
        self.backend.write(filename, f)

        with self.backend.open(filename, 'rb') as f:
            assert f.content_type == content_type

        self.assert_bin_equal(filename, content)
Example #43
0
class GridFsBackend(BaseBackend):
    '''
    A Mongo GridFS backend

    Expect the following settings:

    - `mongo_url`: The Mongo access URL
    - `mongo_db`: The database to store the file in.
    '''
    def __init__(self, name, config):
        super(GridFsBackend, self).__init__(name, config)

        self.client = MongoClient(config.mongo_url)
        self.db = self.client[config.mongo_db]
        self.fs = GridFS(self.db, self.name)

    def exists(self, filename):
        return self.fs.exists(filename=filename)

    @contextmanager
    def open(self, filename, mode='r', encoding='utf8'):
        if 'r' in mode:
            f = self.fs.get_last_version(filename)
            yield f if 'b' in mode else codecs.getreader(encoding)(f)
        else:  # mode == 'w'
            f = io.BytesIO() if 'b' in mode else io.StringIO()
            yield f
            params = {'filename': filename}
            if 'b' not in mode:
                params['encoding'] = encoding
            self.fs.put(f.getvalue(), **params)

    def read(self, filename):
        f = self.fs.get_last_version(filename)
        return f.read()

    def write(self, filename, content):
        kwargs = {
            'filename': filename
        }

        if hasattr(content, 'content_type') and content.content_type is not None:
            kwargs['content_type'] = content.content_type

        return self.fs.put(self.as_binary(content), **kwargs)

    def delete(self, filename):
        regex = '^{0}'.format(re.escape(filename))
        for version in self.fs.find({'filename': {'$regex': regex}}):
            self.fs.delete(version._id)

    def copy(self, filename, target):
        src = self.fs.get_last_version(filename)
        self.fs.put(src, filename=target, content_type=src.content_type)

    def list_files(self):
        for f in self.fs.list():
            yield f

    def serve(self, filename):
        file = self.fs.get_last_version(filename)
        return send_file(file, mimetype=file.content_type)

    def get_metadata(self, filename):
        f = self.fs.get_last_version(filename)
        return {
            'checksum': 'md5:{0}'.format(f.md5),
            'size': f.length,
            'mime': f.content_type,
            'modified': f.upload_date,
        }
Example #44
0
def get_file(rtype,filename):
    MAX_SIZE=2097152 # 2MB
    if (not session.get('logged_in')):
        abort(403)
    if (rtype not in ['audios','videos','flags','images', 'thumbs']):
        abort(404)
    griddb = database.Database(MongoClient(host=GRIDFS_HOST, port=GRIDFS_PORT),rtype)
    fs = GridFS(griddb)

    if (fs.exists(filename=filename)):
        file = fs.get_last_version(filename)
        file_length = file.length
        chunk_length = file_length
        mime = guess_type(filename)[0]

        range_header = False
        if ('Range' in request.headers.keys()):
            range_header = True
            start, end = request.headers['Range'].split('=')[1].split(',')[0].split('-')
            if (end == '' or int(end) > file_length):
                end = file_length
            if (start == ''):
                start = file_length - int(end)
            chunk_file = StringIO()
            end = int(end)
            start = int(start)
            file.seek(start)
            chunk_file.write(file.read(end))
            chunk_file.seek(0)
            chunk_length = end - start
            file.close()
        else:
            file.seek(0)

        def generate():
            while True:
                if (range_header):
                    chunk = chunk_file.read(MAX_SIZE)
                else:
                    chunk = file.read(MAX_SIZE)
                if not chunk:
                    break
                yield chunk

        if (chunk_length > MAX_SIZE):
            if (range_header):
                response = Response(stream_with_context(generate()), 206,  mimetype = mime)
                response.headers.set('Content-Range', 'bytes %d-%d/%d' % (start, (start + chunk_length) - 1, file_length))
            else:
                response = Response(stream_with_context(generate()), 200,  mimetype = mime)
                response.headers.set('Content-Length',file_length)
        else:
            if (range_header):
                response = virtualrest.response_class(chunk_file, 206,  direct_passthrough = True, mimetype = mime)
                response.headers.set('Content-Range', 'bytes %d-%d/%d' % (start, (start + chunk_length) - 1, file_length))
            else:
                response = virtualrest.response_class(file, 200, direct_passthrough = True, mimetype = mime)
                response.headers.set('Content-Length',file_length)
        if (rtype in ['audio','video']):
            response.headers.set('Cache-Control', 'no-cache, no-store, must-revalidate')
            response.headers.set('Pragma','no-cache')
        response.headers.set('Accept-Ranges','bytes')
        return response
    abort(404)
Example #45
0
 def exists(self):
     f = GridFS(self._database, collection=self._collection)
     return f.exists(self._value)
Example #46
0
    def parse_cmd_options(self):
        parser = OptionParser(version=offlineimap.__version__,
                              description="%s.\n\n%s" %
                              (offlineimap.__copyright__,
                               offlineimap.__license__))
        parser.add_option("--dry-run",
                  action="store_true", dest="dryrun",
                  default=False,
                  help="Do not actually modify any store but check and print "
              "what synchronization actions would be taken if a sync would be"
              " performed. It will not precisely give the exact information w"
              "hat will happen. If e.g. we need to create a folder, it merely"
              " outputs 'Would create folder X', but not how many and which m"
              "ails it would transfer.")

        parser.add_option("--info",
                  action="store_true", dest="diagnostics",
                  default=False,
                  help="Output information on the configured email repositories"
              ". Useful for debugging and bug reporting. Use in conjunction wit"
              "h the -a option to limit the output to a single account. This mo"
              "de will prevent any actual sync to occur and exits after it outp"
              "ut the debug information.")

        parser.add_option("-1",
                  action="store_true", dest="singlethreading",
                  default=False,
                  help="Disable all multithreading operations and use "
              "solely a single-thread sync. This effectively sets the "
              "maxsyncaccounts and all maxconnections configuration file "
              "variables to 1.")

        parser.add_option("-P", dest="profiledir", metavar="DIR",
                  help="Sets OfflineIMAP into profile mode. The program "
              "will create DIR (it must not already exist). "
              "As it runs, Python profiling information about each "
              "thread is logged into profiledir. Please note: "
              "This option is present for debugging and optimization "
              "only, and should NOT be used unless you have a "
              "specific reason to do so. It will significantly "
              "decrease program performance, may reduce reliability, "
              "and can generate huge amounts of data. This option "
              "implies the -1 option.")

        parser.add_option("-a", dest="accounts", metavar="ACCOUNTS",
                  help="Overrides the accounts section in the config file. "
              "Lets you specify a particular account or set of "
              "accounts to sync without having to edit the config "
              "file. You might use this to exclude certain accounts, "
              "or to sync some accounts that you normally prefer not to.")

        parser.add_option("-c", dest="configfile", metavar="FILE",
                  default="~/.offlineimaprc",
                  help="Specifies a configuration file to use in lieu of '%default'.\n"
                "Configuration files stored in MongoDB are supported via the "
                "following URI syntax:\n"
                "mongodb://*****:*****@server:port/db/bucket/file.conf")

        parser.add_option("-d", dest="debugtype", metavar="type1,[type2...]",
                  help="Enables debugging for OfflineIMAP. This is useful "
              "if you are to track down a malfunction or figure out what is "
              "going on under the hood. This option requires one or more "
              "debugtypes, separated by commas. These define what exactly "
              "will be debugged, and so far include two options: imap, thread, "
              "maildir or ALL. The imap option will enable IMAP protocol "
              "stream and parsing debugging. Note that the output may contain "
              "passwords, so take care to remove that from the debugging "
              "output before sending it to anyone else. The maildir option "
              "will enable debugging for certain Maildir operations. "
              "The use of any debug option (unless 'thread' is included), "
              "implies the single-thread option -1.")

        parser.add_option("-l", dest="logfile", metavar="FILE",
                  help="Log to FILE")

        parser.add_option("-f", dest="folders", metavar="folder1,[folder2...]",
                  help="Only sync the specified folders. The folder names "
              "are the *untranslated* foldernames of the remote repository. "
              "This command-line option overrides any 'folderfilter' "
              "and 'folderincludes' options in the configuration file.")

        parser.add_option("-k", dest="configoverride",
                  action="append",
                  metavar="[section:]option=value",
                  help=
              """Override configuration file option. If"section" is
              omitted, it defaults to "general". Any underscores
              in the section name are replaced with spaces:
              for instance, to override option "autorefresh" in
              the "[Account Personal]" section in the config file
              one would use "-k Account_Personal:autorefresh=30".""")

        parser.add_option("-o",
                  action="store_true", dest="runonce",
                  default=False,
                  help="Run only once, ignoring any autorefresh setting "
                       "in the configuration file.")

        parser.add_option("-q",
                  action="store_true", dest="quick",
                  default=False,
                  help="Run only quick synchronizations. Ignore any "
              "flag updates on IMAP servers (if a flag on the remote IMAP "
              "changes, and we have the message locally, it will be left "
              "untouched in a quick run.")

        parser.add_option("-u", dest="interface",
                  help="Specifies an alternative user interface to "
              "use. This overrides the default specified in the "
              "configuration file. The UI specified with -u will "
              "be forced to be used, even if checks determine that it is "
              "not usable. Possible interface choices are: %s " %
              ", ".join(UI_LIST.keys()))

        (options, args) = parser.parse_args()

        #read in configuration file

        #: :type: string
        configfilename = os.path.expanduser(options.configfile)
        url = urlparse(configfilename)

        config = CustomConfigParser()

        # connect to gridfs
        if url.scheme == 'mongodb':
            try:
                from gridfs import GridFS
                from pymongo import Connection
            except ImportError:
                logging.error(" *** pymongo must be installed to use mongodb as configuration source; aborting!")
                sys.exit(1)

            path = url.path
            (path, filename) = os.path.split(path)
            (db, collection) = os.path.split(path)
            db_name = os.path.basename(db)

            mongo_uri = ''.join([url[0], '://', url[1], db])
            mongodb = Connection(mongo_uri)
            gfs = GridFS(mongodb[db_name], collection)

            if not gfs.exists(filename=filename):
                logging.error(" *** Config file '%s' does not exist; aborting!" %
                              url.geturl())
                sys.exit(1)

            file = gfs.get_last_version(filename)
            config.readfp(file)

        else:
            if not os.path.exists(configfilename):
                # TODO, initialize and make use of chosen ui for logging
                logging.error(" *** Config file '%s' does not exist; aborting!" %
                              configfilename)
                sys.exit(1)
            config.read(configfilename)


        #profile mode chosen?
        if options.profiledir:
            if not options.singlethreading:
                # TODO, make use of chosen ui for logging
                logging.warn("Profile mode: Forcing to singlethreaded.")
                options.singlethreading = True
            if os.path.exists(options.profiledir):
                # TODO, make use of chosen ui for logging
                logging.warn("Profile mode: Directory '%s' already exists!" %
                             options.profiledir)
            else:
                os.mkdir(options.profiledir)
            threadutil.ExitNotifyThread.set_profiledir(options.profiledir)
            # TODO, make use of chosen ui for logging
            logging.warn("Profile mode: Potentially large data will be "
                         "created in '%s'" % options.profiledir)

        #override a config value
        if options.configoverride:
            for option in options.configoverride:
                (key, value) = option.split('=', 1)
                if ':' in key:
                    (secname, key) = key.split(':', 1)
                    section = secname.replace("_", " ")
                else:
                    section = "general"
                config.set(section, key, value)

        #which ui to use? cmd line option overrides config file
        ui_type = config.getdefault('general','ui', 'ttyui')
        if options.interface != None:
            ui_type = options.interface
        if '.' in ui_type:
            #transform Curses.Blinkenlights -> Blinkenlights
            ui_type = ui_type.split('.')[-1]
            # TODO, make use of chosen ui for logging
            logging.warning('Using old interface name, consider using one '
                            'of %s' % ', '.join(UI_LIST.keys()))
        if options.diagnostics: ui_type = 'basic' # enforce basic UI for --info

        #dry-run? Set [general]dry-run=True
        if options.dryrun:
            dryrun = config.set('general','dry-run', "True")
        config.set_if_not_exists('general','dry-run','False')

        try:
            # create the ui class
            self.ui = UI_LIST[ui_type.lower()](config)
        except KeyError:
            logging.error("UI '%s' does not exist, choose one of: %s" % \
                              (ui_type,', '.join(UI_LIST.keys())))
            sys.exit(1)
        setglobalui(self.ui)

        #set up additional log files
        if options.logfile:
            self.ui.setlogfile(options.logfile)

        #welcome blurb
        self.ui.init_banner()

        if options.debugtype:
            self.ui.logger.setLevel(logging.DEBUG)
            if options.debugtype.lower() == 'all':
                options.debugtype = 'imap,maildir,thread'
            #force single threading?
            if not ('thread' in options.debugtype.split(',') \
                    and not options.singlethreading):
                self.ui._msg("Debug mode: Forcing to singlethreaded.")
                options.singlethreading = True

            debugtypes = options.debugtype.split(',') + ['']
            for type in debugtypes:
                type = type.strip()
                self.ui.add_debug(type)
                if type.lower() == 'imap':
                    imaplib.Debug = 5

        if options.runonce:
            # FIXME: maybe need a better
            for section in accounts.getaccountlist(config):
                config.remove_option('Account ' + section, "autorefresh")

        if options.quick:
            for section in accounts.getaccountlist(config):
                config.set('Account ' + section, "quick", '-1')

        #custom folder list specified?
        if options.folders:
            foldernames = options.folders.split(",")
            folderfilter = "lambda f: f in %s" % foldernames
            folderincludes = "[]"
            for accountname in accounts.getaccountlist(config):
                account_section = 'Account ' + accountname
                remote_repo_section = 'Repository ' + \
                    config.get(account_section, 'remoterepository')
                config.set(remote_repo_section, "folderfilter", folderfilter)
                config.set(remote_repo_section, "folderincludes",
                           folderincludes)

        if options.logfile:
            sys.stderr = self.ui.logfile
    
        socktimeout = config.getdefaultint("general", "socktimeout", 0)
        if socktimeout > 0:
            socket.setdefaulttimeout(socktimeout)

        threadutil.initInstanceLimit('ACCOUNTLIMIT',
            config.getdefaultint('general', 'maxsyncaccounts', 1))

        for reposname in config.getsectionlist('Repository'):
            for instancename in ["FOLDER_" + reposname,
                                 "MSGCOPY_" + reposname]:
                if options.singlethreading:
                    threadutil.initInstanceLimit(instancename, 1)
                else:
                    threadutil.initInstanceLimit(instancename,
                        config.getdefaultint('Repository ' + reposname,
                                                  'maxconnections', 2))
        self.config = config
        return (options, args)
Example #47
0
 def exists(self, remote_path):
     client = self.connect()
     fs = GridFS(client[self.settings.database])
     ret = fs.exists(filename=remote_path)
     client.close()
     return ret
class FuseGridFS(LoggingMixIn, Operations):

    def __init__(self, db_or_uri, collection=None):
        if collection == None:
            url = urlparse(db_or_uri)
            if url.scheme != 'mongodb':
                show_usage()

            path = url.path
            (db_path, collection) = os.path.split(path)
            db = os.path.basename(db_path)

            mongo_uri = ''.join([url[0], '://', url[1], db_path])
        else:
            db = db_or_uri
            mongo_uri = ''

        cn = Connection(mongo_uri)
        self.db = cn[db]
        self.collection = self.db[collection]
        self.gfs = GridFS(self.db, collection=collection)

    def fix_path(self, path):
        if path == '/':
            return ''

        path = path[1:] if path.startswith('/') else path
        path = path.replace('/', '\\/')
        path = path+'\\/' if not path.endswith('/') else path
        return path

    def find_dirs(self, path):
        path = self.fix_path(path)
        map_function = Code('''
        function () {
            var re = /^%s([\w ]+)\//;
            emit(re(this.filename)[1], 1);
        }
        ''' % path)

        reduce_function = Code('''
        function(k,v) {
            return 1;
        }
        ''')
        res = self.collection.files.map_reduce(map_function, reduce_function, out='dirs', query={'filename' : { '$regex' : '^{0}([\w ]+)\/'.format(path) }})
        if res.count() > 0:
            return [a['_id'] for a in res.find()]
        return []

    def find_files(self, path):
        path = self.fix_path(path)
        map_function = Code('''
        function () {
            var re = /^%s([\w ]+(?:\.[\w ]+)*)$/;
            emit(re(this.filename)[1], 1);
        }
        ''' % path)

        reduce_function = Code('''
        function(k,v) {
            return 1;
        }
        ''')
        res = self.collection.files.map_reduce(map_function, reduce_function, out='dirs', query={'filename' : { '$regex' : '^{0}([\w ]+(?:\.[\w ]+)*)$'.format(path) }})
        if res.count() > 0:
            return [a['_id'] for a in res.find()]
        return []

    def is_dir(self, path):
        if path == '/':
            return True

        path = self.fix_path(path)
        res = self.collection.files.find_one({'filename' : { '$regex' : '^{0}'.format(path) }})
        return not res is None

    def readdir(self, path, fh):
        dirs = self.find_dirs(path)
        files = self.find_files(path)

        return ['.', '..'] + dirs + files

    def fuse_to_mongo_path(self, path):
        return path[1:] if path.startswith('/') else path

    def get_mongo_file(self, path):
        path = self.fuse_to_mongo_path(path)
        return self.gfs.get_last_version(filename=path) if self.gfs.exists(filename=path) else None

    def getattr(self, path, fh=None):
        if path == '/' or self.is_dir(path):
            st = dict(st_mode=(S_IFDIR | 0755), st_nlink=2)
        else:
            file = self.get_mongo_file(path)
            if file:
                st = dict(st_mode=(S_IFREG | 0444), st_size=file.length)
            else:
                raise FuseOSError(ENOENT)
        st['st_ctime'] = st['st_mtime'] = st['st_atime'] = time()
        st['st_uid'], st['st_gid'], pid = fuse_get_context()
        return st

    def read(self, path, size, offset, fh):
        file = self.get_mongo_file(path)
        if file:
            file.seek(offset, os.SEEK_SET)
            return file.read(size)
        else:
            raise FuseOSError(ENOENT)

    # Disable unused operations:
    access = None
    flush = None
    getxattr = None
    listxattr = None
    open = None
    opendir = None
    release = None
    releasedir = None
    statfs = None

    def rename_dir(self, old, new):
        print('rename_dir: old=%s, new=%s' % (old, new))

        search = self.fix_path(old)
        res = self.collection.files.find({ 'filename' : { '$regex' : '^{0}'.format(search) } }, fields={ 'filename' : 1 } )
        if not res.count():
            return 0

        new = self.fuse_to_mongo_path(new)
        old = self.fuse_to_mongo_path(old)
        old_len = len(old)

        for a in res:
            new_name = new + a['filename'][old_len:]
            id = a['_id']
            self.collection.files.update({ '_id' : id }, {'$set' : { 'filename' : new_name } })

        return 0

    def rename(self, old, new):
        if self.is_dir(old):
            return self.rename_dir(old, new)

        file = self.get_mongo_file(old)
        if not file:
            raise FuseOSError(ENOENT)

        new = self.fuse_to_mongo_path(new)
        self.collection.files.update({ '_id' : file._id }, {'$set' : { 'filename' : new } })

        return 0
Example #49
0
class Storage(BaseMongoModel):
    """
    Abstraction of the storage. The purpose of this class is to create abstraction
    layer, which provides database-independent API for manipulation in the
    filesystem. The only requirement on the filesystem is that it has to support
    file versioning (or some workaround which implements versioning within the
    fs which does not support versioning natively).

    The implementation is nowadays built on MongoDB.

    Usage:
        >>> from pymongo import Connection
        >>> from model import Storage
        >>> store = Storage(Connection(), "myuid", "webarchive")
        >>> file = store.get("http://www.myfancypage.com/index.html")
        >>> # get last version of the file, which is available in the storage
        >>> c = file.get_last_content()
        >>> # get the raw data
        >>> c.data
        "<html>
        ...
        >>> # content type and content length
        >>> print c.content_type, c.length
        'text/html' 29481

    Design pattern: Factory
    """

    def __init__(self, connection, uid, database="webarchive"):
        """
        Initializes storage.

        @param connection: database connection
        @type connection: pymongo.Connection
        @param uid: user id (see Monitor.__doc__ for more info)
        @type uid: str
        @param database: if the storage is based on database, this param
                         represents the name of database to be used within
                         this instance.
        @type database: str
        """
        if not isinstance(connection, Connection):
            raise TypeError("connection must be instance of pymongo.Connection.")
        self._connection = connection
        self._database = database
        self._uid = uid
        # instance of HTTP header model
        self._headermeta = HttpHeaderMeta(connection, uid, database)
        # filesystem interface
        self.filesystem = GridFS(self._connection[database], "content")
#?        print "STORAGE: FILESYSTEM: ",self.filesystem
        # flag representing possibility to save large objects into storage
        self.allow_large = False

    def allow_large_documents(self):
        """
        Allow large objects to be stored in the storage.
        """
        self.allow_large = True

    def get(self, filename):
        """
        Get file object by filename.

        @param filename: name of the file. In this case, it will be URL.
        @type filaname: str
        @returns: File object representing file in many versions
        @rtype: File
        @raises: DocumentNotAvailable if document doesnt exist in the storage
        """
#?        print "In Storage.get(): resource ",filename
        if not self.filesystem.exists(filename=filename):
            raise DocumentNotAvailable("File does not exist in the storage.")
        return File(filename, self.filesystem, self._headermeta)


    def check_uid(self):
        return self._headermeta.check_uid()
    def generate_weight_dependent_parameters(conn, maindb, subbsn_id):
        """Generate some parameters dependent on weight data and only should be calculated once.
            Such as PHU0 (annual average total potential heat units)
                TMEAN0 (annual average temperature)
            added by Liangjun, 2016-6-17
        """
        spatial_gfs = GridFS(maindb, DBTableNames.gridfs_spatial)
        # read mask file from mongodb
        mask_name = '%d_MASK' % subbsn_id
        # is MASK existed in Database?
        if not spatial_gfs.exists(filename=mask_name):
            raise RuntimeError('%s is not existed in MongoDB!' % mask_name)
        # read WEIGHT_M file from mongodb
        weight_m_name = '%d_WEIGHT_M' % subbsn_id
        mask = maindb[DBTableNames.gridfs_spatial].files.find({'filename': mask_name})[0]
        weight_m = maindb[DBTableNames.gridfs_spatial].files.find({'filename': weight_m_name})[0]
        num_cells = int(weight_m['metadata'][RasterMetadata.cellnum])
        num_sites = int(weight_m['metadata'][RasterMetadata.site_num])
        # read meteorology sites
        site_lists = maindb[DBTableNames.main_sitelist].find({FieldNames.subbasin_id: subbsn_id})
        site_list = next(site_lists)
        db_name = site_list[FieldNames.db]
        m_list = site_list.get(FieldNames.site_m)
        hydro_clim_db = conn[db_name]

        site_list = m_list.split(',')
        site_list = [int(item) for item in site_list]

        q_dic = {StationFields.id: {'$in': site_list},
                 StationFields.type: DataType.phu0}
        cursor = hydro_clim_db[DBTableNames.annual_stats].find(q_dic).sort(StationFields.id, 1)

        q_dic2 = {StationFields.id: {'$in': site_list},
                  StationFields.type: DataType.mean_tmp0}
        cursor2 = hydro_clim_db[DBTableNames.annual_stats].find(q_dic2).sort(StationFields.id, 1)

        id_list = list()
        phu_list = list()
        for site in cursor:
            id_list.append(site[StationFields.id])
            phu_list.append(site[DataValueFields.value])

        id_list2 = list()
        tmean_list = list()
        for site in cursor2:
            id_list2.append(site[StationFields.id])
            tmean_list.append(site[DataValueFields.value])

        weight_m_data = spatial_gfs.get(weight_m['_id'])
        total_len = num_cells * num_sites
        # print(total_len)
        fmt = '%df' % (total_len,)
        weight_m_data = unpack(fmt, weight_m_data.read())

        # calculate PHU0
        phu0_data = np_zeros(num_cells)
        # calculate TMEAN0
        tmean0_data = np_zeros(num_cells)
        for i in range(num_cells):
            for j in range(num_sites):
                phu0_data[i] += phu_list[j] * weight_m_data[i * num_sites + j]
                tmean0_data[i] += tmean_list[j] * weight_m_data[i * num_sites + j]
        ysize = int(mask['metadata'][RasterMetadata.nrows])
        xsize = int(mask['metadata'][RasterMetadata.ncols])
        nodata_value = mask['metadata'][RasterMetadata.nodata]
        mask_data = spatial_gfs.get(mask['_id'])
        total_len = xsize * ysize
        fmt = '%df' % (total_len,)
        mask_data = unpack(fmt, mask_data.read())
        fname = '%d_%s' % (subbsn_id, DataType.phu0)
        fname2 = '%d_%s' % (subbsn_id, DataType.mean_tmp0)
        if spatial_gfs.exists(filename=fname):
            x = spatial_gfs.get_version(filename=fname)
            spatial_gfs.delete(x._id)
        if spatial_gfs.exists(filename=fname2):
            x = spatial_gfs.get_version(filename=fname2)
            spatial_gfs.delete(x._id)
        meta_dic = copy.deepcopy(mask['metadata'])
        meta_dic['TYPE'] = DataType.phu0
        meta_dic['ID'] = fname
        meta_dic['DESCRIPTION'] = DataType.phu0

        meta_dic2 = copy.deepcopy(mask['metadata'])
        meta_dic2['TYPE'] = DataType.mean_tmp0
        meta_dic2['ID'] = fname2
        meta_dic2['DESCRIPTION'] = DataType.mean_tmp0

        myfile = spatial_gfs.new_file(filename=fname, metadata=meta_dic)
        myfile2 = spatial_gfs.new_file(filename=fname2, metadata=meta_dic2)
        vaild_count = 0
        for i in range(0, ysize):
            cur_row = list()
            cur_row2 = list()
            for j in range(0, xsize):
                index = i * xsize + j
                if abs(mask_data[index] - nodata_value) > UTIL_ZERO:
                    cur_row.append(phu0_data[vaild_count])
                    cur_row2.append(tmean0_data[vaild_count])
                    vaild_count += 1
                else:
                    cur_row.append(nodata_value)
                    cur_row2.append(nodata_value)
            fmt = '%df' % xsize
            myfile.write(pack(fmt, *cur_row))
            myfile2.write(pack(fmt, *cur_row2))
        myfile.close()
        myfile2.close()
        print('Valid Cell Number of subbasin %d is: %d' % (subbsn_id, vaild_count))
        return True
Example #51
0
class DocumentStoringPipeline(MediaPipeline):

    def __init__(self, settings):
        if settings.get('MONGO_DATABASE') is None:
            raise NotConfigured()
        self.database = settings.get('MONGO_DATABASE')
        self.gridfs = GridFS(self.database)
        self.links = self.database.links
        self.links.ensure_index([('file', pymongo.ASCENDING), ('referer', pymongo.ASCENDING),
                                 ('number', pymongo.ASCENDING)])
        super(DocumentStoringPipeline, self).__init__()

    @classmethod
    def from_settings(cls, settings):
        return cls(settings)

    def media_failed(self, failure, request, info):
        if not isinstance(failure.value, IgnoreRequest):
            referer = request.headers.get('Referer')
            log.msg(format='File (unknown-error): Error downloading '
                           '%(request)s referred in '
                           '<%(referer)s>: %(exception)s',
                    level=log.WARNING, spider=info.spider, exception=failure.value,
                    request=request, referer=referer)

    def media_downloaded(self, response, request, info):
        referer = request.headers.get('Referer')

        if response.status != 200:
            log.msg(
                format='File (code: %(status)s): Error downloading document from %(request)s referred in <%(referer)s>',
                level=log.WARNING, spider=info.spider,
                status=response.status, request=request, referer=referer)
            return

        if not response.body:
            log.msg(
                format='File (empty-content): Empty document from %(request)s referred in <%(referer)s>: no-content',
                level=log.WARNING, spider=info.spider,
                request=request, referer=referer)
            return

        log.msg(format='File: Downloaded document from %(request)s referred in <%(referer)s>',
                level=log.DEBUG, spider=info.spider, request=request, referer=referer)

        f = self.gridfs.new_file(content_type=request.meta['item']['mime_type'], url=request.url)
        try:
            f.write(response.body)
        finally:
            f.close()
        info.spider.crawler.stats.inc_value('downloaded_documents', spider=info.spider)
        info.spider.crawler.stats.inc_value('downloaded_documents/bytes_downloaded',
                                            len(response.body),
                                            spider=info.spider)
        self.store_link(info, request.meta['item'], f._id)

    def get_media_requests(self, item, info):
        if not isinstance(item, DocumentItem):
            return
        if self.gridfs.exists({'url': item['url']}):
            f = self.gridfs.get_last_version(url=item['url'])
            self.store_link(info, item, f._id)
        else:
            request = Request(item['url'])
            request.meta['item'] = item
            return [request]

    def store_link(self, info, item, file_id):
        if self.links.find({'file': file_id, 'referer': item['referer'], 'number': item['link_number']}).count() > 0:
            return
        self.links.insert({'file': file_id,
                           'referer': item['referer'],
                           'text': item['link_text'],
                           'number': item['link_number'],
                           'fragment': item['fragment'],
                           'nofollow': item['nofollow']})
        info.spider.crawler.stats.inc_value('links_collected', spider=info.spider)
Example #52
0
class ObjectDB:
    def __init__(self, db):
        from gridfs import GridFS

        self.gridfs = GridFS(db)

    def __setitem__(self, key, obj):
        self.save(obj, key)

    def __getitem__(self, key):
        return self.load(key)

    def __delitem__(self, key):
        from pymongo.objectid import ObjectId

        if not isinstance(key, ObjectId):
            id = self.gridfs.get_last_version(key)._id
        else:
            id = key
        self.gridfs.delete(id)

    def __repr__(self):
        return "Key-value database"

    def keys(self):
        """Return list of filenames of objects in the gridfs store."""
        return self.gridfs.list()

    def object_ids(self):
        """Return list of id's of objects in the gridfs store, which
        are not id's of objects with filenames."""
        v = self.gridfs._GridFS__files.find({"filename": {"$exists": False}}, ["_id"])
        return [x["_id"] for x in v]

    def has_key(self, key):
        return self.gridfs.exists(filename=key)

    def save(self, obj, key=None, compress=None):
        """Save Python object obj to the grid file system self.gridfs.
        If key is None, the file is stored by MongoDB assigned
        ObjectID, and that id is returned.
        """
        from sage.all import dumps

        data = dumps(obj, compress=compress)
        if key is not None:
            self.gridfs.put(data, filename=key)
            return key
        else:
            # store by MongoDB assigned _id only, and return that id.
            return self.gridfs.put(data)

    def load(self, key, compress=True):
        from pymongo.objectid import ObjectId

        if isinstance(key, ObjectId):
            data = self.gridfs.get(key).read()
        else:
            data = self.gridfs.get_last_version(key).read()
        from sage.all import loads

        return loads(data, compress=compress)
Example #53
0
class GridFUSE(Operations):

    DEFAULT = ('mongodb://127.0.0.1/gridfs/fs',)
    FMODE = (stat.S_IRWXU|stat.S_IROTH|stat.S_IRGRP)^stat.S_IRUSR
    DMODE = FMODE|stat.S_IXUSR|stat.S_IXGRP|stat.S_IXOTH
    ST = ({
        'st_mode': stat.S_IRWXU|stat.S_IRWXG|stat.S_IRWXO,
        'st_ino': 0,
        'st_dev': 0,
        'st_nlink': 1,
        'st_uid': os.geteuid(),
        'st_gid': os.getegid(),
        'st_size': 0,
        'st_atime': 0,
        'st_mtime': 0,
        'st_ctime': 0,
        })

    def __repr__(self):
        return '<%s.%s: %s>' % (
                __name__,
                self.__class__.__name__,
                ' '.join([
                    ('%s=%r' % x) for x in [
                        ('fs', self.fs),
                        ]]))

    def __init__(self, nodes=None, db=None, coll=None, *args, **kwds):
        super(GridFUSE, self).__init__()
        nodes = nodes or GridFUSE.DEFAULT
        if isinstance(nodes, basestring):
            nodes = [nodes]
        cluster = list()
        for node in nodes:
            uri = urlsplit(node)
            if not uri.scheme:
                cluster.append(node)
                continue
            if uri.scheme != 'mongodb':
                raise TypeError('invalid uri.scheme: %r' % uri.scheme)
            node_db, _, node_coll = uri.path.strip('/').partition('/')
            if db is None and node_db:
                db = node_db
            if coll is None and node_coll:
                coll = node_coll.replace('/', '.')
            if node_db and uri.username is None:
                node_db = str()
            cluster.append(urlunsplit((
                uri.scheme,
                uri.netloc,
                node_db,
                uri.query,
                uri.fragment,
                )))
        if not db or not coll:
            raise TypeError('undefined db and/or root collection')
        conn = self.conn = MongoClient(cluster)
        self.debug = bool(kwds.pop('debug', False))
        self.gfs = GridFS(conn[db], collection=coll)
        self.fs = conn[db][coll]
        self._ctx = Context(self)
        if not self.gfs.exists(filename=''):
            self.mkdir()

    def __call__(self, op, path, *args):
        if not hasattr(self, op):
            raise FuseOSError(EFAULT)
        ret = getattr(self, op)(path.strip('/'), *args)
        if self.debug:
            self._debug(op, path, args, ret)
        return ret

    def _debug(self, op, path, args, ret):
        own = op in self.__class__.__dict__
        sys.stderr.write('%s:%s:%i/%i/%i\n' % (
            (op.upper(), own) + fuse_get_context()
            ))
        sys.stderr.write(':: %s\n' % path)
        if op not in ('read', 'write'):
            sys.stderr.write(':: %s\n' % pf(args))
            sys.stderr.write(':: %s\n' % pf(ret))
        sys.stderr.write('\n')
        sys.stderr.flush()

    def getattr(self, path, fh):
        spec = None
        if fh is not None:
            fh, spec = self._ctx.get(fh)
        elif self.gfs.exists(filename=path, visible=True):
            spec = self.gfs.get_last_version(path)

        if spec is None:
            raise FuseOSError(ENOENT)

        st = spec.stat.copy()
        st['st_size'] = spec.length
        return st

    def rename(self, path, new):
        new = new.strip('/')
        dirname = basename = None
        if new:
            dirname, basename = pth.split(new)
        self.fs.files.update(
                {'filename': path, 'visible': True},
                {'$set': {'filename': new, 'dirname': dirname}},
                upsert=False,
                multi=False,
                )

    def chmod(self, path, mode):
        self.fs.files.update(
                {'filename': path, 'visible': True},
                {'$set': {'stat.st_mode': mode}},
                upsert=False,
                multi=False,
                )

    def chown(self, path, uid, gid):
        self.fs.files.update(
                {'filename': path, 'visible': True},
                {'$set': {'stat.st_uid': uid, 'stat.st_gid': gid}},
                upsert=False,
                multi=False,
                )

    def _ent(self, path):
        if self.gfs.exists(filename=path, visible=True):
            raise FuseOSError(EEXIST)
        dirname = basename = None
        if path:
            dirname, basename = pth.split(path)
        now = time.time()
        st = self.ST.copy()
        st.update(st_ctime=now, st_mtime=now, st_atime=now)
        return self.gfs.new_file(
                filename=path,
                stat=st,
                dirname=dirname,
                visible=True,
                )

    def create(self, path, mode=FMODE, fi=None):
        with self._ent(path) as spec:
            spec._file['stat'].update(st_mode=mode|S_IFREG)
        file = spec._file
        file.pop('_id')
        fh, spec = self._ctx.acquire(GridIn(self.fs, **file))
        if fi is not None:
            fi.fh = fh
            return 0
        return fh

    def mkdir(self, path='', mode=DMODE):
        with self._ent(path) as spec:
            spec._file['stat'].update(st_mode=mode|S_IFDIR)
        return 0

    #TODO: impl?
    def link(self, path, source):
        raise FuseOSError(ENOTSUP)

    def symlink(self, path, source):
        with self._ent(path) as spec:
            spec._file['stat'].update(st_mode=0o0777|S_IFLNK)
            spec.write(str(source))
        return 0

    def readlink(self, path):
        spec = None
        if self.gfs.exists(filename=path, visible=True):
            spec = self.gfs.get_last_version(path)

        if spec is None:
            raise FuseOSError(ENOENT)
        elif not spec.stat['st_mode'] & S_IFLNK > 0:
            raise FuseOSError(EINVAL)

        return spec.read()

    def readdir(self, path, fh):
        spec = None
        if fh is not None:
            fh, spec = self._ctx.get(fh)
        elif self.gfs.exists(filename=path, visible=True):
            spec = self.gfs.get_last_version(path)

        if spec is None:
            raise FuseOSError(ENOENT)
        elif not spec.stat['st_mode'] & S_IFDIR > 0:
            raise FuseOSError(ENOTDIR)

        for rel in ('.', '..'):
            yield rel

        for sub in self.fs.files.find({
            'dirname': path,
            'visible': True,
            }).distinct('filename'):
            yield pth.basename(sub)

    def open(self, path, flags=None):
        #TODO: handle os.O_* flags?
        fh, spec = self._ctx.get(path)
        if hasattr(flags, 'fh'):
            flags.fh = fh
            return 0
        return fh

    opendir = open

    def release(self, path, fh):
        return self._ctx.release(fh)

    releasedir = release

    def read(self, path, size, offset, fh):
        spec = self.gfs.get_last_version(path)
        spec.seek(offset, os.SEEK_SET)
        return spec.read(size)

    def write(self, path, data, offset, fh):
        if fh is not None:
            fh = getattr(fh, 'fh', fh)
            fh, spec = self._ctx.get(fh)
        elif self.gfs.exists(filename=path, visible=True):
            fh, spec = self._ctx.acquire(path)

        if not hasattr(spec, 'write'):
            self.truncate(path, 0, fh=fh)
            spec = self._ctx._fd[fh]
        spec.write(data)

        return len(data)

    def unlink(self, path):
        if not path:
            #...cannot remove mountpoint
            raise FuseOSError(EBUSY)

        spec = self.gfs.get_last_version(path)
        if spec is None or not spec.visible:
            raise FuseOSError(ENOENT)

        self.fs.files.update(
                {'filename': path, 'visible': True},
                {'$set': {'visible': False}},
                upsert=False,
                multi=True,
                )

        return 0

    rmdir = unlink

    def truncate(self, path, length, fh=None):
        if length != 0:
            raise FuseOSError(ENOTSUP)

        spec = None
        if fh is not None:
            fh = getattr(fh, 'fh', fh)
            fh, spec = self._ctx.get(fh)
        elif self.gfs.exists(filename=path, visible=True):
            spec = self.gfs.get_last_version(path)
        if spec is None:
            raise FuseOSError(EBADF)

        if hasattr(spec, 'write') and spec._chunk_number==0:
            spec._buffer.truncate(0)
            spec._buffer.seek(0)
            spec._position = 0
        else:
            #FIXME: this is terrible... whole class needs refactor
            fi = spec._file
            fi.pop('_id')
            with self.gfs.new_file(**fi) as zero:
                self.unlink(path)
            if fh:
                self._ctx.release(fh)
                self._ctx._fd[fh] = self.gfs.new_file(**fi)

        return 0
Example #54
0
class FileStoreMongo(FileStore):
    """
    Filestore database using GridFS (see :mod:`gridfs`)

    :arg pymongo.database.Database connection: MongoDB database object
    """

    def __init__(self, connection):
        self._conn=connection
        self.new_context()
        self._fs=GridFS(self.database)

    def new_file(self, **kwargs):
        """
        See :meth:`FileStore.new_file`

        :rtype: :class:`gridfs.grid_file.GridIn`
        """
        self.delete_files(**kwargs)
        return self._fs.new_file(**kwargs)

    def delete_files(self, **kwargs):
        """
        See :meth:`FileStore.delete_files`
        """
        while self._fs.exists(kwargs):
            self._fs.delete(self._fs.get_last_version(**kwargs)._id)

    def get_file(self, **kwargs):
        """
        See :meth:`FileStore.get_file`

        :rtype: :class:`gridfs.grid_file.GridOut`
        """
        if self._fs.exists(kwargs):
            return self._fs.get(self._fs.get_last_version(**kwargs)._id)
        else:
            return None
    
    def create_file(self, file_handle, **kwargs):
        """
        See :meth:`FileStore.create_file`
        """
        with self.new_file(**kwargs) as f:
            f.write(file_handle.read())

    def copy_file(self, file_handle, **kwargs):
        """
        See :meth:`FileStore.copy_file`
        """
        file_handle.write(self.get_file(**kwargs).read())

    def new_context(self):
        """
        Reconnect to the filestore. This function should be
        called before the first filestore access in each new process.
        """
        self.database=pymongo.database.Database(self._conn, mongo_config['mongo_db'])
        uri=mongo_config['mongo_uri']
        if '@' in uri:
            # strip off optional mongodb:// part
            if uri.startswith('mongodb://'):
                uri=uri[len('mongodb://'):]
            result=self.database.authenticate(uri[:uri.index(':')],uri[uri.index(':')+1:uri.index('@')])
            if result==0:
                raise Exception("MongoDB authentication problem")

    valid_untrusted_methods=()
Example #55
0
class Manager(object):
    """ Holds data regarding database management. """

    def __init__(self, host=None, port=None, database=None, username=None, prefix=None):
        """ Initializes a connection and database. """
        from pymongo import Connection
        from gridfs import GridFS
        from .. import pymongo_host, pymongo_port, vasp_database_name, OUTCARS_prefix, pymongo_username

        super(Manager, self).__init__()

        self._host = host if host is not None else pymongo_host
        """ Host where the database is hosted. """
        self._port = port if port is not None else pymongo_port
        """ Port of the host where the database is hosted. """
        self._vaspbase_name = database if database is not None else vasp_database_name
        """ Name of the vasp database. """
        self._outcars_prefix = prefix if prefix is not None else OUTCARS_prefix
        """ Username for database. """
        self._username = username if username is not None else pymongo_username
        """ Name of the OUTCAR database. """
        self.connection = Connection(host=self._host, port=self._port)
        """ Holds connection to pymongo. """
        self.database = getattr(self.connection, self._vaspbase_name)
        """ Database within pymongo. """
        self.outcars = GridFS(self.database, self.outcars_prefix)
        """ GridFS object for OUTCARs. """
        self.comments = self.database["{0}.comments".format(self.outcars_prefix)]
        """ Collection of comments attached when adding OUTCAR's to a file. """
        self.files = self.database["{0}.files".format(self.outcars_prefix)]
        """ OUTCAR files collection. """
        self.extracted = self.database["extracted"]
        """ Collection with pre-extracted values from the outcar. """
        self.fere = self.database["fere_summary"]
        """ Collection with FERE ground-state analysis. """

    @property
    def host(self):
        """ Host where the database is hosted. """
        return self._host

    @property
    def port(self):
        """ Port of the host where the database is hosted. """
        return self._port

    @property
    def vasp_database_name(self):
        """ Name of the vasp database. """
        return self._vaspbase_name

    @property
    def outcars_prefix(self):
        """ Name of the OUTCAR GridFS collection. """
        return self._outcars_prefix

    def push(self, filename, outcar, comment, **kwargs):
        """ Pushes OUTCAR to database. 

        :raise ValueError:  if the corresponding object is not found.
        :raise IOError:  if the path does not exist or is not a file.
    """
        from hashlib import sha512
        from os import uname
        from .misc import get_username

        assert len(comment.replace(" ", "").replace("\n", "")) != 0, ValueError("Cannot push file if comment is empty.")

        try:
            kwargs["comment"] = self.comments.find({"text": comment}).next()
        except StopIteration:  # add comment to database
            kwargs["comment"] = self.comments.insert({"text": comment})
            print kwargs["comment"]

        hash = sha512(outcar).hexdigest()
        if self.outcars.exists(sha512=hash):
            print "{0} already in database. Please use 'ladabase.update'.".format(filename)
            return
        if "filename" not in kwargs:
            kwargs["filename"] = filename
        if "uploader" not in kwargs:
            kwargs["uploader"] = get_username()
        if "host" not in kwargs:
            kwargs["host"] = uname()[1]
        compression = kwargs.get("compression", None)
        kwargs["compression"] = compression
        if compression == "bz2":
            from bz2 import compress

            return self.outcars.put(compress(outcar), sha512=hash, **kwargs)
        elif compression is None:
            return self.outcars.put(outcar, sha512=hash, **kwargs)
        else:
            raise ValueError("Invalid compression format {0}.".format(compression))

    def find_fromfile(self, path):
        """ Returns the database object corresponding to this file.

        :raise ValueError:  if the corresponding object is not found.
        :raise IOError:  if the path does not exist or is not a file.

        Finds the corresponding file using sha512 hash. 
    """
        from os.path import exists, isfile
        from hashlib import sha512
        from ..opt import RelativeDirectory

        ipath = RelativeDirectory(path).path
        assert exists(ipath), IOError("{0} does not exist.".format(path))
        assert isfile(ipath), IOError("{0} is not a file.".format(path))

        with open(ipath, "r") as file:
            string = file.read()
        hash = sha512(string).hexdigest()

        assert self.outcars.exists(sha512=hash), ValueError("{0} could not be found in database.".format(path))

        return self.files.find_one({"sha512": hash})

    def __contains__(self, path):
        """ True if path already in database. """
        from os.path import exists
        from hashlib import sha512
        from ..opt import RelativeDirectory

        path = RelativeDirectory(path).path
        if not exists(path):
            ValueError("File {0} does not exist.".format(path))
        with open(path, "r") as file:
            string = file.read()
        hash = sha512(string).hexdigest()
        return self.outcars.exists(sha512=hash)
Example #56
0
class TestJob(object):

    def setup_class(self):
        conn = pymongo.MongoClient('localhost')
        self.db = conn.joblog_test
        self.fs = GridFS(self.db)

    def setup_method(self, method):
        self.clf = LogisticRegression
        self.x = np.random.normal(0, 1, (10, 3))
        self.y = np.random.randint(0, 2, (10))
        self.params = dict(penalty='l2', C=2)

        self.db.drop_collection('test')
        self.collection = self.db['test']

    def test_run(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        clf = j.run()

        assert isinstance(clf, LogisticRegression)

        clf2 = self.clf(**self.params).fit(self.x, self.y)
        np.testing.assert_array_equal(clf.predict(self.x),
                                      clf2.predict(self.x))

    def test_result_property(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert j.result is None
        j.run()
        assert j.result is not None

    def test_nonunique_cached(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)

        x = self.x.copy()
        y = self.y.copy()
        j2 = Job(self.clf, x, y, self.params, self.collection)

        assert j.result is None
        assert j2.result is None

        clf1 = j2.run()

        assert j.result is not None

    def test_detect_unique(self):
        """Each unique input gets a new entry"""

        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert j.result is None
        j.run()

        self.x[0] += 5
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert j.result is None
        j.run()

        self.y[0] += 5
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert j.result is None
        j.run()

        self.params = {}
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert j.result is None
        j.run()

        self.clf = LinearRegression
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert j.result is None
        j.run()

    def test_rerun_overrides_cache(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)

        clf1 = j.run()

        #hack: modify x in place after running job.
        #      rerunning will change classification rule
        self.x[:] *= 100
        clf1b = j.run()   # should not rerun
        clf2 = j.rerun()  # should rerun

        assert (clf1.coef_ == clf1b.coef_).all()
        assert not (clf1.coef_ == clf2.coef_).all()

    def test_nosave(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        clf1 = j.run(store=None)
        assert j.result is None


    def test_save_score(self):

        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        clf1 = j.run(store='score')
        assert j.result == clf1.score(self.x, self.y)

    def test_save_predict(self):

        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        clf1 = j.run(store='prediction')
        np.testing.assert_array_equal(j.result, clf1.predict(self.x))

    def test_duplicate(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert not j.duplicate

        j2 = Job(self.clf, self.x, self.y, self.params, self.collection)
        assert not j.duplicate
        assert j2.duplicate

    def test_label(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection,
                label='test_label')
        e = self.collection.find_one({'label':'test_label'})
        assert e['params'] == self.params

    def test_file_cleanup(self):
        #should delete old files if changing result
        j = Job(self.clf, self.x, self.y, self.params, self.collection,
                label='test_label')
        clf = j.run()

        e = self.collection.find_one({'label': 'test_label'})
        fid = e['result']

        j.result = 5
        assert not self.fs.exists(fid)

    def test_get_set(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)
        j2 = Job(self.clf, self.x, self.y, self.params, self.collection)

        j['test_extra'] = 123
        assert j['test_extra'] == 123

        #confirm it's actually in the database
        assert j2['test_extra'] == 123

    def test_invalid_get(self):
        j = Job(self.clf, self.x, self.y, self.params, self.collection)

        with pytest.raises(KeyError) as e:
            j['test']
        assert e.value[0] == 'No attribute test associated with this job'
Example #57
0
class MongoFileStorage(MongoStorage, FileStorage):

    FILENAME = 'filename'

    def __init__(self, *args, **kwargs):

        super(MongoFileStorage, self).__init__(*args, **kwargs)

        self.gridfs = None

    def _connect(self, **kwargs):

        result = super(MongoFileStorage, self)._connect(**kwargs)

        if result:

            self.gridfs = GridFS(
                database=self._database, collection=self.get_table()
            )

        return result

    def put(self, name, data, meta=None):

        try:
            fs = self.new_file(name=name, meta=meta)
            fs.write(data=data)
        finally:
            fs.close()

    def put_meta(self, name, meta):
        try:
            oldf, _meta = self.get(name, with_meta=True)
            _meta.update(meta)

            fs = self.new_file(name=name, meta=_meta)

            while True:
                data = oldf.read(512)

                if not data:
                    break

                fs.write(data=data)

        finally:
            fs.close()

    def get(self, name, version=-1, with_meta=False):

        result = None

        try:
            gridout = self.gridfs.get_version(filename=name, version=version)
        except NoFile:
            pass
        else:
            if with_meta:
                result = MongoFileStream(gridout), gridout.metadata

            else:
                result = MongoFileStream(gridout)

        return result

    def get_meta(self, name):
        result = self.get(name, with_meta=True)

        if result is not None:
            result = result[1]

        return result

    def exists(self, name):

        result = self.gridfs.exists(filename=name)

        return result

    def find(
        self,
        names=None,
        meta=None,
        sort=None,
        limit=-1,
        skip=0,
        with_meta=False
    ):

        request = {}

        if names is not None:
            if isinstance(names, basestring):
                request[MongoFileStorage.FILENAME] = names
            else:
                request[MongoFileStorage.FILENAME] = {'$in': names}

        if meta is not None:
            for metafield in meta:
                field = 'metadata.{0}'.format(metafield)
                request[field] = meta[metafield]

        cursor = self.gridfs.find(request)

        if sort is not None:
            cursor.sort(sort)
        if limit > 0:
            cursor.limit(limit)
        if skip > 0:
            cursor.skip(skip)

        if with_meta:
            result = (
                (MongoFileStream(gridout), gridout.metadata)
                for gridout in cursor
            )

        else:
            result = (MongoFileStream(gridout) for gridout in cursor)

        return result

    def list(self):

        return self.gridfs.list()

    def new_file(self, name=None, meta=None, data=None):

        kwargs = {}

        if name is None:
            name = str(uuid())

        kwargs['filename'] = name

        if meta is not None:
            kwargs['metadata'] = meta

        gridout = self.gridfs.new_file(**kwargs)

        result = MongoFileStream(gridout)

        if data is not None:
            result.write(data)

        return result

    def delete(self, names=None):

        if names is None:
            names = self.gridfs.list()

        names = ensure_iterable(names)

        for name in names:
            while True:
                fs = self.get(name)

                if fs is None:
                    break

                self.gridfs.delete(file_id=fs.get_inner_object()._id)
    def lookup_tables_as_collection_and_gridfs(cfg, maindb):
        """Import lookup tables (from txt file) as Collection and GridFS
        Args:
            cfg: SEIMS config object
            maindb: workflow model database
        """
        for tablename, txt_file in list(cfg.paramcfgs.lookup_tabs_dict.items()):
            # import each lookup table as a collection and GridFS file.
            c_list = maindb.collection_names()
            if not StringClass.string_in_list(tablename.upper(), c_list):
                maindb.create_collection(tablename.upper())
            else:
                maindb.drop_collection(tablename.upper())
            # initial bulk operator
            bulk = maindb[tablename.upper()].initialize_ordered_bulk_op()
            # delete if the tablename gridfs file existed
            spatial = GridFS(maindb, DBTableNames.gridfs_spatial)
            if spatial.exists(filename=tablename.upper()):
                x = spatial.get_version(filename=tablename.upper())
                spatial.delete(x._id)

            # read data items
            data_items = read_data_items_from_txt(txt_file)
            field_names = data_items[0][0:]
            item_values = list()  # import as gridfs file
            for i, cur_data_item in enumerate(data_items):
                if i == 0:
                    continue
                data_import = dict()  # import as Collection
                item_value = list()  # import as gridfs file
                for idx, fld in enumerate(field_names):
                    if MathClass.isnumerical(cur_data_item[idx]):
                        tmp_value = float(cur_data_item[idx])
                        data_import[fld] = tmp_value
                        item_value.append(tmp_value)
                    else:
                        data_import[fld] = cur_data_item[idx]
                bulk.insert(data_import)
                if len(item_value) > 0:
                    item_values.append(item_value)
            MongoUtil.run_bulk(bulk, 'No operations during import %s.' % tablename)
            # begin import gridfs file
            n_row = len(item_values)
            # print(item_values)
            if n_row >= 1:
                n_col = len(item_values[0])
                for i in range(n_row):
                    if n_col != len(item_values[i]):
                        raise ValueError('Please check %s to make sure each item has '
                                         'the same numeric dimension. The size of first '
                                         'row is: %d, and the current data item is: %d' %
                                         (tablename, n_col, len(item_values[i])))
                    else:
                        item_values[i].insert(0, n_col)

                metadic = {ModelParamDataUtils.item_count: n_row,
                           ModelParamDataUtils.field_count: n_col}
                cur_lookup_gridfs = spatial.new_file(filename=tablename.upper(), metadata=metadic)
                header = [n_row]
                fmt = '%df' % 1
                s = pack(fmt, *header)
                cur_lookup_gridfs.write(s)
                fmt = '%df' % (n_col + 1)
                for i in range(n_row):
                    s = pack(fmt, *item_values[i])
                    cur_lookup_gridfs.write(s)
                cur_lookup_gridfs.close()
Example #59
-1
def test_post_grid_calendar_returns_success_status(app, coverage, get_app_context):
    filename = 'export_calendars.zip'
    path = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), 'fixtures/gridcalendar/', filename)
    files = {'file': (open(path, 'rb'), 'export_calendars.zip')}
    raw = app.post('/coverages/jdr/grid_calendar', data=files)
    r = to_json(raw)
    assert raw.status_code == 200
    assert r.get('message') == 'OK'
    raw = app.get('/coverages')
    r = to_json(raw)
    assert len(r['coverages']) == 1
    assert 'grid_calendars_id' in r['coverages'][0]
    gridfs = GridFS(mongo.db)
    file_id = r['coverages'][0]['grid_calendars_id']
    assert gridfs.exists(ObjectId(file_id))
    #we update the file (it's the same, but that's not the point)
    files = {'file': (open(path, 'rb'), 'export_calendars.zip')}
    raw = app.post('/coverages/jdr/grid_calendar', data=files)
    assert raw.status_code == 200

    raw = app.get('/coverages')
    r = to_json(raw)
    assert len(r['coverages']) == 1
    assert 'grid_calendars_id' in r['coverages'][0]
    #it should be another file
    assert file_id != r['coverages'][0]['grid_calendars_id']
    #the previous file has been deleted
    assert not gridfs.exists(ObjectId(file_id))
    #and the new one exist
    assert gridfs.exists(ObjectId(r['coverages'][0]['grid_calendars_id']))