def __init__(self, path): root_dir = os.path.abspath(path) if not os.path.exists(root_dir): raise IOError('Path does not exist: %s' % path) fs = HashFS(os.path.join(root_dir, 'hashfs'), depth=3, width=2, algorithm='md5') # Setup Serialisation for non list/dict objects serialization_store = SerializationMiddleware() serialization_store.register_serializer(DateTimeSerializer(), 'TinyDate') serialization_store.register_serializer(FileSerializer(fs), 'TinyFile') if opt.has_numpy: serialization_store.register_serializer(NdArraySerializer(), 'TinyArray') if opt.has_pandas: serialization_store.register_serializer(DataFrameSerializer(), 'TinyDataFrame') serialization_store.register_serializer(SeriesSerializer(), 'TinySeries') db = TinyDB(os.path.join(root_dir, 'metadata.json'), storage=serialization_store) self.db = db self.runs = db.table('runs') self.fs = fs
def create(path='./runs_db', overwrite=None): root_dir = os.path.abspath(path) if not os.path.exists(root_dir): os.makedirs(root_dir) fs = HashFS(os.path.join(root_dir, 'hashfs'), depth=3, width=2, algorithm='md5') # Setup Serialisation object for non list/dict objects serialization_store = SerializationMiddleware() serialization_store.register_serializer(DateTimeSerializer(), 'TinyDate') serialization_store.register_serializer(FileSerializer(fs), 'TinyFile') if opt.has_numpy: serialization_store.register_serializer(NdArraySerializer(), 'TinyArray') if opt.has_pandas: serialization_store.register_serializer(DataFrameSerializer(), 'TinyDataFrame') serialization_store.register_serializer(SeriesSerializer(), 'TinySeries') db = TinyDB(os.path.join(root_dir, 'metadata.json'), storage=serialization_store) return TinyDbObserver(db, fs, overwrite=overwrite, root=root_dir)
def test_tinydb_observer_equality(tmpdir, tinydb_obs): db = TinyDB(os.path.join(tmpdir.strpath, 'metadata.json')) fs = HashFS(os.path.join(tmpdir.strpath, 'hashfs'), depth=3, width=2, algorithm='md5') m = TinyDbObserver(db, fs) assert tinydb_obs == m assert not tinydb_obs != m assert not tinydb_obs == 'foo' assert tinydb_obs != 'foo'
def test_tinydb_observer_equality(tmpdir, tinydb_obs): db = TinyDB(os.path.join(tmpdir.strpath, "metadata.json")) fs = HashFS( os.path.join(tmpdir.strpath, "hashfs"), depth=3, width=2, algorithm="md5" ) m = TinyDbObserver.create_from(db, fs) assert tinydb_obs == m assert not tinydb_obs != m assert not tinydb_obs == "foo" assert tinydb_obs != "foo"
def get_db_file_manager(root_dir) -> Tuple[TinyDB, HashFS]: root_dir = Path(root_dir) fs = HashFS(root_dir / "hashfs", depth=3, width=2, algorithm="md5") # Setup Serialisation object for non list/dict objects serialization_store = SerializationMiddleware() serialization_store.register_serializer(DateTimeSerializer(), "TinyDate") serialization_store.register_serializer(FileSerializer(fs), "TinyFile") if opt.has_numpy: serialization_store.register_serializer(NdArraySerializer(), "TinyArray") if opt.has_pandas: serialization_store.register_serializer(DataFrameSerializer(), "TinyDataFrame") serialization_store.register_serializer(SeriesSerializer(), "TinySeries") db = TinyDB(os.path.join(root_dir, "metadata.json"), storage=serialization_store) return db, fs
def get_db_file_manager(root_dir): fs = HashFS(os.path.join(root_dir, 'hashfs'), depth=3, width=2, algorithm='md5') # Setup Serialisation object for non list/dict objects serialization_store = SerializationMiddleware() serialization_store.register_serializer(DateTimeSerializer(), 'TinyDate') serialization_store.register_serializer(FileSerializer(fs), 'TinyFile') if opt.has_numpy: serialization_store.register_serializer(NdArraySerializer(), 'TinyArray') if opt.has_pandas: serialization_store.register_serializer(DataFrameSerializer(), 'TinyDataFrame') serialization_store.register_serializer(SeriesSerializer(), 'TinySeries') db = TinyDB(os.path.join(root_dir, 'metadata.json'), storage=serialization_store) return db, fs
"long", "datetime.datetime", "list", "dict", "ObjectId", "IS()" ) TYPES_LIST = ['GSystemType', 'RelationType', 'AttributeType', 'MetaType', 'ProcessType'] my_doc_requirement = u'storing_orignal_doc' reduced_doc_requirement = u'storing_reduced_doc' to_reduce_doc_requirement = u'storing_to_be_reduced_doc' indexed_word_list_requirement = u'storing_indexed_words' # CUSTOM DATA-TYPE DEFINITIONS #STATUS_CHOICES_TU = IS(u'DRAFT', u'HIDDEN', u'PUBLISHED', u'DELETED', u'MODERATION') #STATUS_CHOICES = tuple(str(qtc) for qtc in STATUS_CHOICES_TU) #QUIZ_TYPE_CHOICES_TU = IS(u'Short-Response', u'Single-Choice', u'Multiple-Choice') #QUIZ_TYPE_CHOICES = tuple(str(qtc) for qtc in QUIZ_TYPE_CHOICES_TU) # Designate a root folder for HashFS. If the folder does not exists already, it will be created. # Set the `depth` to the number of subfolders the file's hash should be split when saving. # Set the `width` to the desired width of each subfolder. gfs = HashFS(MEDIA_ROOT, depth=3, width=1, algorithm='sha256') # gfs: gstudio file system # DATABASE Variables db = get_db()
def setup(root_path): """Module initialization.""" fs = HashFS(root_path, depth=3, width=2, algorithm='sha256') global put_file put_file = partial(getattr(put_file, 'func', put_file), fs)
def image_path(instance, filename): basename, ext = os.path.splitext(filename) fs = HashFS('photos', depth=4, width=2, algorithm='sha256') stream = getattr(instance, 'image').chunks() id = fs.computehash(stream) return idpath(fs, id, extension=ext)