class ClientTxnLog(HasFileStorage): """ An immutable log of transactions made by the client. """ def __init__(self, name, baseDir=None): self.dataDir = "data/clients" self.name = name HasFileStorage.__init__(self, name, baseDir=baseDir, dataDir=self.dataDir) self.clientDataLocation = self.dataLocation if not os.path.exists(self.clientDataLocation): os.makedirs(self.clientDataLocation) self.transactionLog = TextFileStore(self.clientDataLocation, "transactions") self.serializer = CompactSerializer(fields=self.txnFieldOrdering) @property def txnFieldOrdering(self): fields = getTxnOrderedFields() return updateFieldsWithSeqNo(fields) def append(self, identifier: str, reqId, txn): key = '{}{}'.format(identifier, reqId) self.transactionLog.put(key=key, value=self.serializer.serialize(txn, fields=self.txnFieldOrdering, toBytes=False)) def hasTxn(self, identifier, reqId) -> bool: key = '{}{}'.format(identifier, reqId) for key in self.transactionLog.iterator(includeKey=True, includeValue=False): if key == str(reqId): return True return False
class ClientTxnLog(HasFileStorage): """ An immutable log of transactions made by the client. """ def __init__(self, name, baseDir=None): self.dataDir = "data/clients" self.name = name HasFileStorage.__init__(self, name, baseDir=baseDir, dataDir=self.dataDir) self.clientDataLocation = self.dataLocation if not os.path.exists(self.clientDataLocation): os.makedirs(self.clientDataLocation) self.transactionLog = TextFileStore(self.clientDataLocation, "transactions") self.serializer = CompactSerializer(fields=self.txnFieldOrdering) @property def txnFieldOrdering(self): fields = getTxnOrderedFields() return updateFieldsWithSeqNo(fields) def append(self, reqId, txn): self.transactionLog.put(key=str(reqId), value=self.serializer.serialize(txn, fields=self.txnFieldOrdering, toBytes=False)) def hasTxnWithReqId(self, reqId) -> bool: for key in self.transactionLog.iterator(includeKey=True, includeValue=False): if key == str(reqId): return True return False
class EntityFileStore(EntityStore): def __init__(self, name: str, dataDir: str): self._db = TextFileStore(dbName=name, dbDir=dataDir) def add(self, name: str, entity): self._db.put(name, entity) def get(self, name: str): return self._db.get(name)
def __init__(self, name, baseDir=None): self.dataDir = "data/clients" self.name = name HasFileStorage.__init__(self, name, baseDir=baseDir, dataDir=self.dataDir) self.clientDataLocation = self.dataLocation if not os.path.exists(self.clientDataLocation): os.makedirs(self.clientDataLocation) self.transactionLog = TextFileStore(self.clientDataLocation, "transactions") self.serializer = CompactSerializer(fields=self.txnFieldOrdering)
def start(self, loop=None, ensureDurability=True): if self._transactionLog and not self._transactionLog.closed: logging.debug("Ledger already started.") else: logging.debug("Starting ledger...") ensureDurability = ensureDurability or self.ensureDurability self._transactionLog = TextFileStore( self.dataDir, self._transactionLogName, isLineNoKey=True, storeContentHash=False, ensureDurability=ensureDurability)
def testMeasureWriteTime(tempdir): store = TextFileStore(tempdir, 'benchWithSync', isLineNoKey=True, storeContentHash=False) hashes = [hexlify(h).decode() for h in generateHashes(1000)] start = time.time() for h in hashes: store.put(value=h) timeTakenWithSync = time.time() - start store = TextFileStore(tempdir, 'benchWithoutSync', isLineNoKey=True, storeContentHash=False, ensureDurability=False) start = time.time() for h in hashes: store.put(value=h) timeTakenWithoutSync = time.time() - start print("Time taken to write {} entries to file with fsync is {} " "seconds".format(len(hashes), timeTakenWithSync)) print("Time taken to write {} entries to file without fsync is {} " "seconds".format(len(hashes), timeTakenWithoutSync)) print("So the difference is {} seconds".format(timeTakenWithSync - timeTakenWithoutSync)) # On most platforms the ratio between write time with fsync and # write time without fsync typically must be greater than 100. # But on Windows Server 2012 this ratio may be less - down to 30. assert timeTakenWithoutSync*10 < timeTakenWithSync, "ratio is {}".\ format(timeTakenWithSync/timeTakenWithoutSync)
def _defaultStore(dataDir, logName, ensureDurability, defaultFile) -> FileStore: return TextFileStore(dataDir, logName, isLineNoKey=True, storeContentHash=False, ensureDurability=ensureDurability, defaultFile=defaultFile)
def testMeasureWriteTime(tempdir): store = TextFileStore(tempdir, 'benchWithSync', isLineNoKey=True, storeContentHash=False) hashes = [hexlify(h).decode() for h in generateHashes(1000)] start = time.time() for h in hashes: store.put(value=h) timeTakenWithSync = time.time() - start store = TextFileStore(tempdir, 'benchWithoutSync', isLineNoKey=True, storeContentHash=False, ensureDurability=False) start = time.time() for h in hashes: store.put(value=h) timeTakenWithoutSync = time.time() - start print("Time taken to write {} entries to file with fsync is {} " "seconds".format(len(hashes), timeTakenWithSync)) print("Time taken to write {} entries to file without fsync is {} " "seconds".format(len(hashes), timeTakenWithoutSync)) print("So the difference is {} seconds". format(timeTakenWithSync-timeTakenWithoutSync)) # On most platforms the ratio between write time with fsync and # write time without fsync typically must be greater than 100. # But on Windows Server 2012 this ratio may be less - down to 30. assert timeTakenWithoutSync*10 < timeTakenWithSync, "ratio is {}".\ format(timeTakenWithSync/timeTakenWithoutSync)
def initStorage(storageType, name, dataDir=None, config=None): if storageType == StorageType.File: if dataDir is None: raise DataDirectoryNotFound return TextFileStore(dataDir, name) elif storageType == StorageType.OrientDB: if config is None: raise DBConfigNotFound orientConf = config.OrientDB return OrientDbStore(user=orientConf["user"], password=orientConf["password"], host=orientConf["host"], port=orientConf["port"], dbName=name)
def test_equality_to_text_file_store(tmpdir): """ This test verifies that TextFileStore and ChunkedFileStore behave equally """ isLineNoKey = True storeContentHash = False ensureDurability = True dbDir = str(tmpdir) defaultFile = os.path.join(dbDir, "template") lines = [ "FirstLine\n", "OneMoreLine\n", "AnotherLine\n", "LastDefaultLine\n" ] with open(defaultFile, "w") as f: f.writelines(lines) chunkSize = len(lines) chunkedStore = ChunkedFileStore(dbDir=dbDir, dbName="chunked_data", isLineNoKey=isLineNoKey, storeContentHash=storeContentHash, chunkSize=chunkSize, ensureDurability=ensureDurability, chunkStoreConstructor=TextFileStore, defaultFile=defaultFile) textStore = TextFileStore(dbDir=dbDir, dbName="text_data", isLineNoKey=isLineNoKey, storeContentHash=storeContentHash, ensureDurability=ensureDurability, defaultFile=defaultFile) for i in range(1, 5 * chunkSize): value = str(i) chunkedStore.put(value) textStore.put(value) assert textStore.get(value) == chunkedStore.get(value) assert list(chunkedStore.iterator()) == \ list(textStore.iterator())
def __init__(self, name: str, dataDir: str): self._db = TextFileStore(dbName=name, dbDir=dataDir)
def getLedger(baseDir, dbName, storeHash=True, isLineNoKey: bool = False): return TextFileStore(dbDir=baseDir, dbName=dbName, storeContentHash=storeHash, isLineNoKey=isLineNoKey)
class Ledger(ImmutableStore): def __init__(self, tree: MerkleTree, dataDir: str, serializer: MappingSerializer = None, fileName: str = None, ensureDurability: bool = True): """ :param tree: an implementation of MerkleTree :param dataDir: the directory where the transaction log is stored :param serializer: an object that can serialize the data before hashing it and storing it in the MerkleTree :param fileName: the name of the transaction log file """ self.dataDir = dataDir self.tree = tree self.leafSerializer = serializer or \ JsonSerializer() # type: MappingSerializer self.hasher = TreeHasher() self._transactionLog = None # type: FileStore self._transactionLogName = fileName or "transactions" self.ensureDurability = ensureDurability self.start(ensureDurability=ensureDurability) self.seqNo = 0 self.recoverTree() def recoverTree(self): # TODO: Should probably have 2 classes of hash store, # persistent and non persistent # TODO: this definitely should be done in a more generic way: if not isinstance(self.tree, CompactMerkleTree): logging.error("Do not know how to recover {}".format(self.tree)) raise TypeError("Merkle tree type {} is not supported".format( type(self.tree))) # ATTENTION! # This functionality is disabled until better consistency verification # implemented - always using recovery from transaction log # if not self.tree.hashStore \ # or isinstance(self.tree.hashStore, MemoryHashStore) \ # or self.tree.leafCount == 0: # logging.debug("Recovering tree from transaction log") # self.recoverTreeFromTxnLog() # else: # try: # logging.debug("Recovering tree from hash store of size {}". # format(self.tree.leafCount)) # self.recoverTreeFromHashStore() # except ConsistencyVerificationFailed: # logging.error("Consistency verification of merkle tree " # "from hash store failed, " # "falling back to transaction log") # self.recoverTreeFromTxnLog() logging.debug("Recovering tree from transaction log") start = time.perf_counter() self.recoverTreeFromTxnLog() end = time.perf_counter() t = end - start logging.debug( "Recovered tree from transaction log in {} seconds".format(t)) def recoverTreeFromTxnLog(self): self.tree.hashStore.reset() for key, entry in self._transactionLog.iterator(): record = self.leafSerializer.deserialize(entry) self._addToTree(record) def recoverTreeFromHashStore(self): treeSize = self.tree.leafCount self.seqNo = treeSize hashes = list( reversed(self.tree.inclusion_proof(treeSize, treeSize + 1))) self.tree._update(self.tree.leafCount, hashes) self.tree.verifyConsistency(self._transactionLog.numKeys) def add(self, leaf): self._addToStore(leaf) merkleInfo = self._addToTree(leaf) return merkleInfo def _addToTree(self, leafData): serializedLeafData = self.serializeLeaf(leafData) auditPath = self.tree.append(serializedLeafData) self.seqNo += 1 merkleInfo = { F.seqNo.name: self.seqNo, F.rootHash.name: base64.b64encode(self.tree.root_hash).decode(), F.auditPath.name: [base64.b64encode(h).decode() for h in auditPath] } return merkleInfo def _addToStore(self, data): key = str(self.seqNo + 1) self._transactionLog.put(key=key, value=self.leafSerializer.serialize( data, toBytes=False)) def append(self, txn): merkleInfo = self.add(txn) return merkleInfo def get(self, **kwargs): for seqNo, value in self._transactionLog.iterator(): data = self.leafSerializer.deserialize(value) # If `kwargs` is a subset of `data` if set(kwargs.values()) == {data.get(k) for k in kwargs.keys()}: data[F.seqNo.name] = int(seqNo) return data def getBySeqNo(self, seqNo): key = str(seqNo) value = self._transactionLog.get(key) if value: return self.leafSerializer.deserialize(value) else: return value def __getitem__(self, seqNo): return self.getBySeqNo(seqNo) def lastCount(self): key = self._transactionLog.lastKey return 0 if key is None else int(key) def serializeLeaf(self, leafData): return self.leafSerializer.serialize(leafData) @property def size(self) -> int: return self.tree.tree_size def __len__(self): return self.size @property def root_hash(self) -> str: return base64.b64encode(self.tree.root_hash).decode() def merkleInfo(self, seqNo): seqNo = int(seqNo) assert seqNo > 0 rootHash = self.tree.merkle_tree_hash(0, seqNo) auditPath = self.tree.inclusion_proof(seqNo - 1, seqNo) return { F.rootHash.name: base64.b64encode(rootHash).decode(), F.auditPath.name: [base64.b64encode(h).decode() for h in auditPath] } def start(self, loop=None, ensureDurability=True): if self._transactionLog and not self._transactionLog.closed: logging.debug("Ledger already started.") else: logging.debug("Starting ledger...") ensureDurability = ensureDurability or self.ensureDurability self._transactionLog = TextFileStore( self.dataDir, self._transactionLogName, isLineNoKey=True, storeContentHash=False, ensureDurability=ensureDurability) def stop(self): self._transactionLog.close() def reset(self): self._transactionLog.reset() def getAllTxn(self, frm: int = None, to: int = None): result = OrderedDict() for seqNo, txn in self._transactionLog.iterator(): seqNo = int(seqNo) if (frm is None or seqNo >= frm) and \ (to is None or seqNo <= to): result[seqNo] = self.leafSerializer.deserialize(txn) if to is not None and seqNo > to: break return result
def initStorage(storageType, name, dataDir=None, config=None): if storageType == StorageType.File: if dataDir is None: raise DataDirectoryNotFound return TextFileStore(dataDir, name)