def run(self): try: self.db = DataBlock(self.path, self.f_len) while True: data = self.db.read(1000) if not data: break self.checksum.update(data) self.db.close() os.remove(self.path) except Exception, err: print 'ERROR: %s' % err
def __init__(self, journal_key, journal_path, fabnet_gateway): self.__journal_key = journal_key self.__journal_path = journal_path self.__journal = DataBlock(self.__journal_path, force_create=True) self.__fabnet_gateway = fabnet_gateway self.__last_record_id = 0 self.__no_foreign = True self.__is_sync = False self.__sync_failed = False self.__j_sync_thrd = JournalSyncThread(self) self.__j_sync_thrd.start()
class DBWriter(threading.Thread): def __init__(self, path, f_len): threading.Thread.__init__(self) self.path = path if os.path.exists(self.path): os.remove(self.path) open(self.path, 'w').close() self.f_len = f_len print 'simulating data block with size = %s ...' % f_len self.db = None self.checksum = hashlib.sha1() def run(self): self.db = DataBlock(self.path) # self.f_len) f_len = self.f_len parts = random.randint(1, 11) for i in xrange(parts): data = ''.join( random.choice(string.letters) for i in xrange(f_len / parts)) self.checksum.update(data) self.db.write(data) time.sleep(.1) rest = f_len - (f_len / parts) * parts if rest: data = ''.join(random.choice(string.letters) for i in xrange(rest)) self.checksum.update(data) self.db.write(data) self.db.finalize() self.db.close() def get_checksum(self): return self.checksum.hexdigest()
class DBWriter(threading.Thread): def __init__(self, path, f_len): threading.Thread.__init__(self) self.path = path if os.path.exists(self.path): os.remove(self.path) open(self.path, 'w').close() self.f_len = f_len print 'simulating data block with size = %s ...'%f_len self.db = None self.checksum = hashlib.sha1() def run(self): self.db = DataBlock(self.path)# self.f_len) f_len = self.f_len parts = random.randint(1,11) for i in xrange(parts): data = ''.join(random.choice(string.letters) for i in xrange(f_len/parts)) self.checksum.update(data) self.db.write(data) time.sleep(.1) rest = f_len - (f_len/parts)*parts if rest: data = ''.join(random.choice(string.letters) for i in xrange(rest)) self.checksum.update(data) self.db.write(data) self.db.finalize() self.db.close() def get_checksum(self): return self.checksum.hexdigest()
def iter(self, start_record_id=None): JLock.lock() try: j_data = DataBlock(self.__journal_path, actsize=True) buf = '' while True: if len(buf) < self.RECORD_STRUCT_SIZE: buf += j_data.read(1024) #logger.debug('J_ITER: buf=%s'%buf.encode('hex').upper()) if not buf: break #logger.debug('J_ITER: header=%s'%buf[:self.RECORD_STRUCT_SIZE].encode('hex').upper()) item_dump_len, operation_type, record_id = struct.unpack( self.RECORD_STRUCT, buf[:self.RECORD_STRUCT_SIZE]) #logger.debug('J_ITER: buf_len=%s, item_dump_len=%s, operation_type=%s, record_id=%s'%(len(buf), item_dump_len, operation_type, record_id)) if operation_type not in (self.OT_APPEND, self.OT_UPDATE, self.OT_REMOVE): #logger.debug('J_ITER: buf=%s'%buf.encode('hex').upper()) raise RuntimeError( 'Invalid journal!!! Unknown operation type: %s' % operation_type) if len(buf) < (self.RECORD_STRUCT_SIZE + item_dump_len): buf += j_data.read(1024) item_dump = buf[self. RECORD_STRUCT_SIZE:self.RECORD_STRUCT_SIZE + item_dump_len] remaining_len = BLOCK_SIZE - self.RECORD_STRUCT_SIZE - item_dump_len to_pad_len = remaining_len % BLOCK_SIZE #logger.debug('J_ITER: record=%s'%buf[:self.RECORD_STRUCT_SIZE+item_dump_len+to_pad_len].encode('hex').upper()) buf = buf[self.RECORD_STRUCT_SIZE + item_dump_len + to_pad_len:] self.__last_record_id = record_id if (start_record_id is None) or (record_id > start_record_id): if operation_type == self.OT_REMOVE: item_md = struct.unpack('<I', item_dump)[0] else: item_md = AbstractMetadataObject.load_md(item_dump) logger.debug( 'J_ITER: record_id=%s, operation_type=%s, item_md=%s' % (record_id, operation_type, item_md)) yield record_id, operation_type, item_md finally: JLock.unlock()
def __recv_journal(self): self.__journal.remove() self.__journal = DataBlock(self.__journal_path, force_create=True) is_recv = self.__fabnet_gateway.get(self.__journal_key, 2, self.__journal) if is_recv: self.__no_foreign = False self.__is_sync = True self.__journal.close() #next __journal.write reopen data block events_provider.info("journal", "Journal is received from NimbusFS backend") else: events_provider.warning( "journal", "Can't receive journal from NimbusFS backend") self.__no_foreign = True
def __clear_dyn_cache(self, del_size): removed_size = 0 self.__lock.acquire() try: del_lst = [] for item in os.listdir(self.__dyn_cache): path = os.path.join(self.__dyn_cache, item) if DataBlock.is_locked(path): logger.debug('can not remove data block at %s bcs it is locked!'%path) continue del_lst.append(path, os.stat(path)) del_lst = sorted(del_lst, lambda a,b: cmp(a[1].st_atime, b[1].st_atime)) for path, stat in del_lst: logger.debug('clearing data block at %s'%path) DataBloc.remove_on_unlock(path) removed_size += stat.st_size if removed_size >= del_size: break self.__ph_free_size += removed_size finally: self.__lock.release() if removed_size < del_size: return False return True
def __clear_dyn_cache(self, del_size): removed_size = 0 self.__lock.acquire() try: del_lst = [] for item in os.listdir(self.__dyn_cache): path = os.path.join(self.__dyn_cache, item) if DataBlock.is_locked(path): logger.debug( 'can not remove data block at %s bcs it is locked!' % path) continue del_lst.append(path, os.stat(path)) del_lst = sorted(del_lst, lambda a, b: cmp(a[1].st_atime, b[1].st_atime)) for path, stat in del_lst: logger.debug('clearing data block at %s' % path) DataBloc.remove_on_unlock(path) removed_size += stat.st_size if removed_size >= del_size: break self.__ph_free_size += removed_size finally: self.__lock.release() if removed_size < del_size: return False return True
def new_data_block(self, item_id, seek, size=None): path = self.__db_cache.get_cache_path('%s.%s' % (item_id, seek)) if size is None: #open DB for write #FIXME data block seek should be implemented in future open(path, 'w').close() data_block = DataBlock(path, size, force_create=True) return data_block
def run(self): self.db = DataBlock(self.path) # self.f_len) f_len = self.f_len parts = random.randint(1, 11) for i in xrange(parts): data = ''.join( random.choice(string.letters) for i in xrange(f_len / parts)) self.checksum.update(data) self.db.write(data) time.sleep(.1) rest = f_len - (f_len / parts) * parts if rest: data = ''.join(random.choice(string.letters) for i in xrange(rest)) self.checksum.update(data) self.db.write(data) self.db.finalize() self.db.close()
class DBReader(threading.Thread): def __init__(self, path, f_len): threading.Thread.__init__(self) self.path = path self.f_len = f_len self.checksum = hashlib.sha1() def run(self): try: self.db = DataBlock(self.path, self.f_len) while True: data = self.db.read(1000) if not data: break self.checksum.update(data) self.db.close() os.remove(self.path) except Exception, err: print 'ERROR: %s'%err
def iter(self, start_record_id=None): JLock.lock() try: j_data = DataBlock(self.__journal_path, actsize=True) buf = "" while True: if len(buf) < self.RECORD_STRUCT_SIZE: buf += j_data.read(1024) # logger.debug('J_ITER: buf=%s'%buf.encode('hex').upper()) if not buf: break # logger.debug('J_ITER: header=%s'%buf[:self.RECORD_STRUCT_SIZE].encode('hex').upper()) item_dump_len, operation_type, record_id = struct.unpack( self.RECORD_STRUCT, buf[: self.RECORD_STRUCT_SIZE] ) # logger.debug('J_ITER: buf_len=%s, item_dump_len=%s, operation_type=%s, record_id=%s'%(len(buf), item_dump_len, operation_type, record_id)) if operation_type not in (self.OT_APPEND, self.OT_UPDATE, self.OT_REMOVE): # logger.debug('J_ITER: buf=%s'%buf.encode('hex').upper()) raise RuntimeError("Invalid journal!!! Unknown operation type: %s" % operation_type) if len(buf) < (self.RECORD_STRUCT_SIZE + item_dump_len): buf += j_data.read(1024) item_dump = buf[self.RECORD_STRUCT_SIZE : self.RECORD_STRUCT_SIZE + item_dump_len] remaining_len = BLOCK_SIZE - self.RECORD_STRUCT_SIZE - item_dump_len to_pad_len = remaining_len % BLOCK_SIZE # logger.debug('J_ITER: record=%s'%buf[:self.RECORD_STRUCT_SIZE+item_dump_len+to_pad_len].encode('hex').upper()) buf = buf[self.RECORD_STRUCT_SIZE + item_dump_len + to_pad_len :] self.__last_record_id = record_id if (start_record_id is None) or (record_id > start_record_id): if operation_type == self.OT_REMOVE: item_md = struct.unpack("<I", item_dump)[0] else: item_md = AbstractMetadataObject.load_md(item_dump) logger.debug( "J_ITER: record_id=%s, operation_type=%s, item_md=%s" % (record_id, operation_type, item_md) ) yield record_id, operation_type, item_md finally: JLock.unlock()
class DBReader(threading.Thread): def __init__(self, path, f_len): threading.Thread.__init__(self) self.path = path self.f_len = f_len self.checksum = hashlib.sha1() def run(self): try: self.db = DataBlock(self.path, self.f_len) while True: data = self.db.read(1000) if not data: break self.checksum.update(data) self.db.close() os.remove(self.path) except Exception, err: print 'ERROR: %s' % err
def _synchronize(self): try: logger.debug('synchronizing journal...') self.__journal.flush() j_data = DataBlock(self.__journal_path, actsize=True) is_send = self.__fabnet_gateway.put(j_data, key=self.__journal_key) if is_send: self.__is_sync = True self.__sync_failed = False except Exception, err: self.__sync_failed = True raise err
def run(self): try: self.db = DataBlock(self.path, self.f_len) while True: data = self.db.read(1000) if not data: break self.checksum.update(data) self.db.close() os.remove(self.path) except Exception, err: print 'ERROR: %s'%err
def __recv_journal(self): self.__journal.remove() self.__journal = DataBlock(self.__journal_path, force_create=True) is_recv = self.__fabnet_gateway.get(self.__journal_key, 2, self.__journal) if is_recv: self.__no_foreign = False self.__is_sync = True self.__journal.close() # next __journal.write reopen data block events_provider.info("journal", "Journal is received from NimbusFS backend") else: events_provider.warning("journal", "Can't receive journal from NimbusFS backend") self.__no_foreign = True
def run(self): self.db = DataBlock(self.path)# self.f_len) f_len = self.f_len parts = random.randint(1,11) for i in xrange(parts): data = ''.join(random.choice(string.letters) for i in xrange(f_len/parts)) self.checksum.update(data) self.db.write(data) time.sleep(.1) rest = f_len - (f_len/parts)*parts if rest: data = ''.join(random.choice(string.letters) for i in xrange(rest)) self.checksum.update(data) self.db.write(data) self.db.finalize() self.db.close()
def remove_data_block(self, db_hash): path = os.path.join(self.__dyn_cache, db_hash) if not os.path.exists(path): return logger.debug('removing data block at %s'%path) DataBlock.remove_on_unlock(path)
def test_data_block(self): ks = FileBasedSecurityManager(CLIENT_KS_1024_PATH, PASSWD) DataBlock.SECURITY_MANAGER = ks DB_PATH = tmp('test_data_block.kst') DATA_LEN = 10 if os.path.exists(DB_PATH): os.remove(DB_PATH) db = DataBlock(DB_PATH, DATA_LEN, force_create=True) checksum = hashlib.sha1() for i in xrange(DATA_LEN/10): data = ''.join(random.choice(string.letters) for i in xrange(DATA_LEN/(DATA_LEN/10))) checksum.update(data) db.write(data) db.close() db.close() #should be OK or_checksum = checksum.hexdigest() enc_checksum = db.checksum() db = DataBlock(DB_PATH, DATA_LEN) ret_data = '' checksum = hashlib.sha1() while True: data = db.read(100) if not data: break ret_data += data checksum.update(data) self.assertEqual(or_checksum, checksum.hexdigest()) self.assertEqual(db.checksum(), enc_checksum) db = DataBlock(DB_PATH, DATA_LEN) raw = db.read_raw() self.assertEqual(db.checksum(), enc_checksum) db = DataBlock(DB_PATH, DATA_LEN) raw = db.read() self.assertEqual(ret_data, raw) app_db = DataBlock(DB_PATH) app_db.write('The end!') app_db.finalize() app_db.close() db = DataBlock(DB_PATH, actsize=True) raw = db.read() self.assertEqual(ret_data+'The end!', raw) db.close()
class Journal: # journal statuses JS_SYNC = 0 JS_NOT_SYNC = 1 JS_NOT_INIT = 2 JS_SYNC_FAILED = 3 # types of operations for journaling OT_APPEND = 1 OT_UPDATE = 2 OT_REMOVE = 3 RECORD_STRUCT = "<IBQ" RECORD_STRUCT_SIZE = struct.calcsize(RECORD_STRUCT) def __init__(self, journal_key, journal_path, fabnet_gateway): self.__journal_key = journal_key self.__journal_path = journal_path self.__journal = DataBlock(self.__journal_path, force_create=True) self.__fabnet_gateway = fabnet_gateway self.__last_record_id = 0 self.__no_foreign = True self.__is_sync = False self.__sync_failed = False self.__j_sync_thrd = JournalSyncThread(self) self.__j_sync_thrd.start() def get_journal_key(self): return self.__journal_key def __recv_journal(self): self.__journal.remove() self.__journal = DataBlock(self.__journal_path, force_create=True) is_recv = self.__fabnet_gateway.get(self.__journal_key, 2, self.__journal) if is_recv: self.__no_foreign = False self.__is_sync = True self.__journal.close() # next __journal.write reopen data block events_provider.info("journal", "Journal is received from NimbusFS backend") else: events_provider.warning("journal", "Can't receive journal from NimbusFS backend") self.__no_foreign = True def close(self): self.__journal.close() self.__j_sync_thrd.stop() @JLock def synchronized(self): return self.__is_sync @JLock def status(self): if self.__sync_failed: return self.JS_SYNC_FAILED if self.__no_foreign: return self.JS_NOT_INIT if not self.__is_sync: return self.JS_NOT_SYNC return self.JS_SYNC @JLock def _synchronize(self): try: logger.debug("synchronizing journal...") self.__journal.flush() j_data = DataBlock(self.__journal_path, actsize=True) is_send = self.__fabnet_gateway.put(j_data, key=self.__journal_key) if is_send: self.__is_sync = True self.__sync_failed = False except Exception, err: self.__sync_failed = True raise err
class Journal: #journal statuses JS_SYNC = 0 JS_NOT_SYNC = 1 JS_NOT_INIT = 2 JS_SYNC_FAILED = 3 #types of operations for journaling OT_APPEND = 1 OT_UPDATE = 2 OT_REMOVE = 3 RECORD_STRUCT = '<IBQ' RECORD_STRUCT_SIZE = struct.calcsize(RECORD_STRUCT) def __init__(self, journal_key, journal_path, fabnet_gateway): self.__journal_key = journal_key self.__journal_path = journal_path self.__journal = DataBlock(self.__journal_path, force_create=True) self.__fabnet_gateway = fabnet_gateway self.__last_record_id = 0 self.__no_foreign = True self.__is_sync = False self.__sync_failed = False self.__j_sync_thrd = JournalSyncThread(self) self.__j_sync_thrd.start() def get_journal_key(self): return self.__journal_key def __recv_journal(self): self.__journal.remove() self.__journal = DataBlock(self.__journal_path, force_create=True) is_recv = self.__fabnet_gateway.get(self.__journal_key, 2, self.__journal) if is_recv: self.__no_foreign = False self.__is_sync = True self.__journal.close() #next __journal.write reopen data block events_provider.info("journal", "Journal is received from NimbusFS backend") else: events_provider.warning( "journal", "Can't receive journal from NimbusFS backend") self.__no_foreign = True def close(self): self.__journal.close() self.__j_sync_thrd.stop() @JLock def synchronized(self): return self.__is_sync @JLock def status(self): if self.__sync_failed: return self.JS_SYNC_FAILED if self.__no_foreign: return self.JS_NOT_INIT if not self.__is_sync: return self.JS_NOT_SYNC return self.JS_SYNC @JLock def _synchronize(self): try: logger.debug('synchronizing journal...') self.__journal.flush() j_data = DataBlock(self.__journal_path, actsize=True) is_send = self.__fabnet_gateway.put(j_data, key=self.__journal_key) if is_send: self.__is_sync = True self.__sync_failed = False except Exception, err: self.__sync_failed = True raise err
def test01_trans_manager(self): db_cache = DataBlockCache(tmp('smart_file_test'), user_id=sha1('test').hexdigest()) open(db_cache.get_static_cache_path('transactions-share.log'), 'w').close() try: db_cache.clear_all() md = MetadataFile(db_cache.get_static_cache_path('test_md.bin')) tr_manager = TransactionsManager(md, db_cache, 2) transaction_id = tr_manager.start_upload_transaction('/not_cached_test.file') tr_manager.transfer_data_block(transaction_id, 0, 3500, \ DataBlock(db_cache.get_cache_path('fake_for_delete'), force_create=True)) self.assertTrue(os.path.exists(db_cache.get_cache_path('fake_for_delete'))) fake_for_delete_name = '%s.%s'%(transaction_id, 0) transaction_id = tr_manager.start_upload_transaction('/my_second_test.file') tr_manager.transfer_data_block(transaction_id, 0, 1000, DataBlock(db_cache.get_cache_path('fake'))) tr_manager.transfer_data_block(transaction_id, 1000, 2000, DataBlock(db_cache.get_cache_path('fake1'))) tr_manager.update_transaction_state(transaction_id, Transaction.TS_LOCAL_SAVED) tr_manager.transfer_data_block(transaction_id, 0, 1000, DataBlock(db_cache.get_cache_path('fake2')), '%040x'%123456) transaction = tr_manager.start_download_transaction('/test.file') db,_,_ = transaction.get_data_block(0) read_block_name = '%s.%s'%(transaction.get_id(), 0) self.assertTrue(os.path.exists(db_cache.get_cache_path(read_block_name))) db.close() md.close() tr_manager.close() self.assertFalse(DataBlock.is_locked(db_cache.get_cache_path(read_block_name))) md = MetadataFile(db_cache.get_static_cache_path('test_md.bin')) tr_manager = TransactionsManager(md, db_cache, 2) up_queue = tr_manager.get_upload_queue() self.assertEqual(up_queue.qsize(), 1) self.assertFalse(os.path.exists(db_cache.get_cache_path(fake_for_delete_name))) self.assertFalse(os.path.exists(db_cache.get_cache_path(read_block_name)), read_block_name) md.close() tr_manager.close() open(db_cache.get_static_cache_path('transactions-share.log'), 'w').close() md = MetadataFile(db_cache.get_static_cache_path('test_md.bin')) tr_manager = TransactionsManager(md, db_cache, 5) transactions = [] for i in xrange(7): transaction_id = tr_manager.start_upload_transaction('/%s_test.file'%i) transactions.append(transaction_id) cnt = 0 for i, (is_up, path, stat, size, progress) in enumerate(tr_manager.iterate_transactions()): self.assertEqual(path, '/%s_test.file'%i) cnt += 1 self.assertEqual(cnt, 7) for tr_id in transactions: tr_manager.update_transaction_state(tr_id, Transaction.TS_FAILED) cnt = 0 tr_manager.start_upload_transaction('/7_test.file') for i, (is_up, path, stat, size, progress) in enumerate(tr_manager.iterate_transactions()): self.assertEqual(path, '/%s_test.file'%(i+3)) cnt += 1 self.assertEqual(cnt, 5) for i in xrange(5): tr_manager.start_upload_transaction('/%s_2_test.file'%i) finally: db_cache.stop()
def test00_base(self): ks = FileBasedSecurityManager(CLIENT_KS_PATH, PASSWD) DataBlock.SECURITY_MANAGER = ks DataBlock.LOCK_MANAGER = DBLocksManager() remove_dir(tmp('smart_file_test')) os.makedirs(tmp('smart_file_test/dynamic_cache')) os.makedirs(tmp('smart_file_test/static_cache')) db_cache = DataBlockCache(tmp('smart_file_test'), user_id=sha1('test').hexdigest()) md = MetadataFile(db_cache.get_static_cache_path('test_md.bin')) tr_manager = None try: tr_manager = TransactionsManager(md, db_cache) p_queue = tr_manager.get_upload_queue() g_queue = tr_manager.get_download_queue() SmartFileObject.setup_transaction_manager(tr_manager) e_file = SmartFileObject('/empty_file', for_write=True) e_file.close() e_file = SmartFileObject('/empty_file') data = e_file.read() self.assertEqual(data, '') e_file.close() test_file = SmartFileObject('/test.file', for_write=True) test_file.write('this is test message for one data block!') test_file.close() put_obj = p_queue.get(False) transaction, seek = put_obj self.assertEqual(seek, 0) data_block, next_seek, _ = transaction.get_data_block(seek, noclone=True) self.assertNotEqual(data_block, None) self.assertEqual(next_seek, None) data_block.close() tr_manager.update_transaction(transaction.get_id(), seek, is_failed=False, foreign_name='%040x'%123456) self.assertEqual(transaction.get_status(), Transaction.TS_FINISHED) db_path = '%s.%s'%(transaction.get_id(), seek) self.assertFalse(DataBlock.is_locked(db_cache.get_cache_path(db_path))) test_file = SmartFileObject('/test.file') data = test_file.read(4) self.assertEqual(data, 'this') self.assertTrue(DataBlock.is_locked(db_cache.get_cache_path(db_path))) data = test_file.read() self.assertEqual(data, ' is test message for one data block!') test_file.close() self.assertFalse(DataBlock.is_locked(db_cache.get_cache_path(db_path))) with self.assertRaises(Empty): get_obj = g_queue.get(False) mgt = MockedGetThread(g_queue) mgt.start() open(db_cache.get_cache_path(db_path), 'w').write('invalid data') #failed local data block test_file = SmartFileObject('/test.file') data = test_file.read() self.assertEqual(data, 'this is test message for one data block!') test_file.close() db_cache.clear_all() test_file = SmartFileObject('/test.file') data = test_file.read() self.assertEqual(data, 'this is test message for one data block!') test_file.close() self.assertFalse(DataBlock.is_locked(db_cache.get_cache_path('%040x'%123456))) finally: if tr_manager: g_queue.put((None,None)) tr_manager.close() md.close() db_cache.stop()
def remove_data_block(self, db_hash): path = os.path.join(self.__dyn_cache, db_hash) if not os.path.exists(path): return logger.debug('removing data block at %s' % path) DataBlock.remove_on_unlock(path)
def test_data_block(self): ks = FileBasedSecurityManager(CLIENT_KS_1024_PATH, PASSWD) DataBlock.SECURITY_MANAGER = ks DB_PATH = tmp('test_data_block.kst') DATA_LEN = 10 if os.path.exists(DB_PATH): os.remove(DB_PATH) db = DataBlock(DB_PATH, DATA_LEN, force_create=True) checksum = hashlib.sha1() for i in xrange(DATA_LEN / 10): data = ''.join( random.choice(string.letters) for i in xrange(DATA_LEN / (DATA_LEN / 10))) checksum.update(data) db.write(data) db.close() db.close() #should be OK or_checksum = checksum.hexdigest() enc_checksum = db.checksum() db = DataBlock(DB_PATH, DATA_LEN) ret_data = '' checksum = hashlib.sha1() while True: data = db.read(100) if not data: break ret_data += data checksum.update(data) self.assertEqual(or_checksum, checksum.hexdigest()) self.assertEqual(db.checksum(), enc_checksum) db = DataBlock(DB_PATH, DATA_LEN) raw = db.read_raw() self.assertEqual(db.checksum(), enc_checksum) db = DataBlock(DB_PATH, DATA_LEN) raw = db.read() self.assertEqual(ret_data, raw) app_db = DataBlock(DB_PATH) app_db.write('The end!') app_db.finalize() app_db.close() db = DataBlock(DB_PATH, actsize=True) raw = db.read() self.assertEqual(ret_data + 'The end!', raw) db.close()