def test_hits(self): for size in SIZES: l = LRU(size) for i in range(size): l[i] = str(i) val = l[0] self.assertTrue(l.get_hits() == 1) self.assertTrue(l.get_misses() == 0) val = l.get(0, None) self.assertTrue(l.get_hits() == 2) self.assertTrue(l.get_misses() == 0) val = l.get(-1, None) self.assertTrue(l.get_hits() == 2) self.assertTrue(l.get_misses() == 1) try: val = l[-1] except: pass self.assertTrue(l.get_hits() == 2) self.assertTrue(l.get_misses() == 2) l.clear() self.assertTrue(len(l) == 0) self.assertTrue(l.get_hits() == 0) self.assertTrue(l.get_misses() == 0)
def test_stats(self): for size in SIZES: l = LRU(size) for i in range(size): l[i] = str(i) self.assertTrue(l.get_stats() == (0, 0)) val = l[0] self.assertTrue(l.get_stats() == (1, 0)) val = l.get(0, None) self.assertTrue(l.get_stats() == (2, 0)) val = l.get(-1, None) self.assertTrue(l.get_stats() == (2, 1)) try: val = l[-1] except: pass self.assertTrue(l.get_stats() == (2, 2)) l.clear() self.assertTrue(len(l) == 0) self.assertTrue(l.get_stats() == (0, 0))
class CacheMixin(object): MAX_CACHE_SPACE = 40000000 CACHE_LENGTH = 10000 def __init__(self, *args, **kwargs): super(CacheMixin, self).__init__(*args, **kwargs) self._caches = None self.clear_cache() self._raw_history_bars = self.raw_history_bars self.raw_history_bars = self.decorator_raw_history_bars(self.raw_history_bars) @classmethod def set_cache_length(cls, value): cls.CACHE_LENGTH = value @classmethod def set_max_cache_space(cls, value): cls.MAX_CACHE_SPACE = value def clear_cache(self): if self._caches is None: self._caches = LRU(self.MAX_CACHE_SPACE // self.CACHE_LENGTH) else: self._caches.clear() def update_cache(self, cache, dt): if len(cache): last = cache.last_dt + timedelta(seconds=1) else: bar_data = self._raw_history_bars(cache.instrument, cache.frequency, end_dt=dt - timedelta(seconds=1), length=cache.chunk) if bar_data is not None and len(bar_data): cache.update_bars(bar_data, len(bar_data)) last = dt bar_data = self._raw_history_bars(cache.instrument, cache.frequency, start_dt=last, length=cache.chunk) if bar_data is not None and len(bar_data): cache.update_bars(bar_data, cache.chunk) else: cache.close() def decorator_raw_history_bars(self, func): @functools.wraps(func) def wrapped(instrument, frequency, start_dt=None, end_dt=None, length=None): key = (instrument.order_book_id, frequency) if key not in self._caches: self._caches[key] = Cache(self, self.CACHE_LENGTH, instrument, frequency) data = self._caches[key].raw_history_bars(start_dt, end_dt, length) if data is not None: return data else: system_log.debug("缓存未命中: 品种[{}]频率[{}] from {} to {}, length {}".format( instrument.order_book_id, frequency, start_dt, end_dt, length )) return func(instrument, frequency, start_dt=start_dt, end_dt=end_dt, length=length) return wrapped def raw_history_bars(self, instrument, frequency, start_dt=None, end_dt=None, length=None): raise NotImplementedError
def test_clear(self): for size in SIZES: l = LRU(size) for i in range(size+5): l[i] = str(i) l.clear() for i in range(size): l[i] = str(i) for i in range(size): _ = l[random.randint(0, size-1)] l.clear() self.assertTrue(len(l) == 0)
def test_clear(self): for size in SIZES: l = LRU(size) for i in range(size+5): l[i] = str(i) l.clear() for i in range(size): l[i] = str(i) for i in xrange(size): _ = l[random.randint(0, size-1)] l.clear() self.assertTrue(len(l) == 0)
def test_setdefault(self): l = LRU(2) l[1] = '1' val = l.setdefault(1) self.assertEqual('1', val) self.assertEqual((1, 0), l.get_stats()) val = l.setdefault(2, '2') self.assertEqual('2', val) self.assertEqual((1, 1), l.get_stats()) self.assertEqual(val, l[2]) l.clear() val = 'long string' * 512 l.setdefault(1, val) l[2] = '2' l[3] = '3' self.assertTrue(val)
class CacheRecord(BaseDB): def __init__(self, db: BaseDB, size=1024): self._db = db self._size = size self._cached = LRU(self._size) def cache_clear(self): self._cached.clear() def __setitem__(self, key, value): self._cached[key] = value self._db[key] = value def __getitem__(self, key): if key not in self._cached: self._cached[key] = self._db[key] return self._cached[key] def __contains__(self, key): return key in self._db
# Would print [(3, '3'), (5, '5'), (2, '2')] print(l.get_size()) # Would print 3 print(l.has_key(5)) # Would print True print(2 in l) # Would print True l.get_stats() # Would print (1, 0) l.update(5='0') # Update an item print l.items() # Would print [(5, '0'), (3, '3'), (2, '2')] l.clear() print l.items() # Would print [] def evicted(key, value): print "removing: %s, %s" % (key, value) l = LRU(1, callback=evicted) l[1] = '1' l[2] = '2' # callback would print removing: 1, 1 l[2] = '3'
class AccountDB(AccountDatabaseAPI): logger = get_extended_debug_logger('eth.db.account.AccountDB') def __init__(self, db: AtomicDatabaseAPI, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = KeyAccessLoggerAtomicDB(db, log_missing_keys=False) self._batchdb = BatchDB(self._raw_store_db) self._batchtrie = BatchDB(self._raw_store_db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_logger = KeyAccessLoggerDB(self._trie, log_missing_keys=False) self._trie_cache = CacheDB(self._trie_logger) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores: Dict[Address, AccountStorageDatabaseAPI] = {} self._dirty_accounts: Set[Address] = set() self._root_hash_at_last_persist = state_root self._accessed_accounts: Set[Address] = set() self._accessed_bytecodes: Set[Address] = set() @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: if self._trie.root_hash != value: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account_store = self._get_address_store(address) return account_store.get(slot, from_journal) def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.set(slot, value) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") self._set_storage_root(address, BLANK_ROOT_HASH) self._wipe_storage(address) def _wipe_storage(self, address: Address) -> None: """ Wipe out the storage, without explicitly handling the storage root update """ account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.delete() def _get_address_store(self, address: Address) -> AccountStorageDatabaseAPI: if address in self._account_stores: store = self._account_stores[address] else: storage_root = self._get_storage_root(address) store = AccountStorageDB(self._raw_store_db, storage_root, address) self._account_stores[address] = store return store def _dirty_account_stores( self) -> Iterable[Tuple[Address, AccountStorageDatabaseAPI]]: for address in self._dirty_accounts: store = self._account_stores[address] yield address, store @to_tuple def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]: # list all the accounts that were changed, and their new storage roots for address, store in self._dirty_account_stores(): if store.has_changed_root: yield address, store.get_changed_root() def _get_storage_root(self, address: Address) -> Hash32: account = self._get_account(address) return account.storage_root def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None: account = self._get_account(address) self._set_account(address, account.copy(storage_root=new_storage_root)) def _validate_flushed_storage(self, address: Address, store: AccountStorageDatabaseAPI) -> None: if store.has_changed_root: actual_storage_root = self._get_storage_root(address) expected_storage_root = store.get_changed_root() if expected_storage_root != actual_storage_root: raise ValidationError( "Storage root was not saved to account before trying to persist roots. " f"Account {address!r} had storage {actual_storage_root!r}, " f"but should be {expected_storage_root!r}.") # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") code_hash = self.get_code_hash(address) if code_hash == EMPTY_SHA3: return b'' else: try: return self._journaldb[code_hash] except KeyError: raise MissingBytecode(code_hash) #from KeyError finally: if code_hash in self._get_accessed_node_hashes(): self._accessed_bytecodes.add(address) def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") # We must wipe the storage first, because if it's the first time we load it, # then we want to load it with the original storage root hash, not the # empty one. (in case of a later revert, we don't want to poison the storage cache) self._wipe_storage(address) if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") account_rlp = self._get_encoded_account(address, from_journal=True) return account_rlp != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 # # Internal # def _get_encoded_account(self, address: Address, from_journal: bool = True) -> bytes: self._accessed_accounts.add(address) lookup_trie = self._journaltrie if from_journal else self._trie_cache try: return lookup_trie[address] except trie_exceptions.MissingTrieNode as exc: raise MissingAccountTrieNode(*exc.args) from exc except KeyError: # In case the account is deleted in the JournalDB return b'' def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache.keys(): return self._account_cache[address] rlp_account = self._get_encoded_account(address, from_journal) if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> JournalDBCheckpoint: checkpoint = self._journaldb.record() self._journaltrie.record(checkpoint) for _, store in self._dirty_account_stores(): store.record(checkpoint) return checkpoint def discard(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.discard(checkpoint) self._journaltrie.discard(checkpoint) self._account_cache.clear() for _, store in self._dirty_account_stores(): store.discard(checkpoint) def commit(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.commit(checkpoint) self._journaltrie.commit(checkpoint) for _, store in self._dirty_account_stores(): store.commit(checkpoint) def lock_changes(self) -> None: for _, store in self._dirty_account_stores(): store.lock_changes() def make_state_root(self) -> Hash32: for _, store in self._dirty_account_stores(): store.make_storage_root() for address, storage_root in self._get_changed_roots(): if self.account_exists(address) or storage_root != BLANK_ROOT_HASH: self._set_storage_root(address, storage_root) self._journaldb.persist() diff = self._journaltrie.diff() if diff.deleted_keys() or diff.pending_items(): # In addition to squashing (which is redundant here), this context manager causes # an atomic commit of the changes, so exceptions will revert the trie with self._trie.squash_changes() as memory_trie: self._apply_account_diff_without_proof(diff, memory_trie) self._journaltrie.reset() self._trie_cache.reset_cache() return self.state_root def persist(self) -> MetaWitnessAPI: self.make_state_root() # persist storage with self._raw_store_db.atomic_batch() as write_batch: for address, store in self._dirty_account_stores(): self._validate_flushed_storage(address, store) store.persist(write_batch) for address, new_root in self._get_changed_roots(): if new_root is None: raise ValidationError( f"Cannot validate new root of account 0x{address.hex()} " f"which has a new root hash of None") elif new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH: raise ValidationError( "After persisting storage trie, a root node was not found. " f"State root for account 0x{address.hex()} " f"is missing for hash 0x{new_root.hex()}.") # generate witness (copy) before clearing the underlying data meta_witness = self._get_meta_witness() # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() self._accessed_accounts = set() self._accessed_bytecodes = set() # We have to clear the account cache here so that future account accesses # will get added to _accessed_accounts correctly. Account accesses that # are cached do not add the address to the list of accessed accounts. self._account_cache.clear() # persist accounts self._validate_generated_root() new_root_hash = self.state_root with self._raw_store_db.atomic_batch() as write_batch: self._batchtrie.commit_to(write_batch, apply_deletes=False) self._batchdb.commit_to(write_batch, apply_deletes=False) self._root_hash_at_last_persist = new_root_hash return meta_witness def _get_accessed_node_hashes(self) -> Set[Hash32]: return cast(Set[Hash32], self._raw_store_db.keys_read) @to_dict def _get_access_list( self) -> Iterable[Tuple[Address, AccountQueryTracker]]: """ Get the list of addresses that were accessed, whether the bytecode was accessed, and which storage slots were accessed. """ for address in self._accessed_accounts: did_access_bytecode = address in self._accessed_bytecodes if address in self._account_stores: accessed_storage_slots = self._account_stores[ address].get_accessed_slots() else: accessed_storage_slots = frozenset() yield address, AccountQueryTracker(did_access_bytecode, accessed_storage_slots) def _get_meta_witness(self) -> MetaWitness: """ Get a variety of metadata about the state witness needed to execute the block. This creates a copy, so that underlying changes do not affect the returned MetaWitness. """ return MetaWitness(self._get_accessed_node_hashes(), self._get_access_list()) def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): raise ValidationError( f"AccountDB had a dirty db when it needed to be clean: {db_diff!r}" ) trie_diff = self._journaltrie.diff() if len(trie_diff): raise ValidationError( f"AccountDB had a dirty trie when it needed to be clean: {trie_diff!r}" ) def _apply_account_diff_without_proof(self, diff: DBDiff, trie: DatabaseAPI) -> None: """ Apply diff of trie updates, when original nodes might be missing. Note that doing this naively will raise exceptions about missing nodes from *intermediate* trie roots. This captures exceptions and uses the previous trie root hash that will be recognized by other nodes. """ # It's fairly common that when an account is deleted, we need to retrieve nodes # for accounts that were not needed during normal execution. We only need these # nodes to refactor the trie. for delete_key in diff.deleted_keys(): try: del trie[delete_key] except trie_exceptions.MissingTrieNode as exc: raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc # It's fairly unusual, but possible, that setting an account will need unknown # nodes during a trie refactor. Here is an example that seems to cause it: # # Setup: # - Root node is a branch, with 0 pointing to a leaf # - The complete leaf key is (0, 1, 2), so (1, 2) is in the leaf node # - We know the leaf node hash but not the leaf node body # Refactor that triggers missing node: # - Add value with key (0, 3, 4) # - We need to replace the current leaf node with a branch that points leaves at 1 and 3 # - The leaf for key (0, 1, 2) now contains only the (2) part, so needs to be rebuilt # - We need the full body of the old (1, 2) leaf node, to rebuild for key, val in diff.pending_items(): try: trie[key] = val except trie_exceptions.MissingTrieNode as exc: raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseAtomicDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = db self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db, read_through_deletes=True) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores = {} # type: Dict[Address, AccountStorageDB] self._dirty_accounts = set() # type: Set[Address] self._root_hash_at_last_persist = state_root @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: if self._trie.root_hash != value: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account_store = self._get_address_store(address) return account_store.get(slot, from_journal) def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.set(slot, value) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") self._set_storage_root(address, BLANK_ROOT_HASH) self._wipe_storage(address) def _wipe_storage(self, address: Address) -> None: """ Wipe out the storage, without explicitly handling the storage root update """ account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.delete() def _get_address_store(self, address: Address) -> AccountStorageDB: if address in self._account_stores: store = self._account_stores[address] else: storage_root = self._get_storage_root(address) store = AccountStorageDB(self._raw_store_db, storage_root, address) self._account_stores[address] = store return store def _dirty_account_stores( self) -> Iterable[Tuple[Address, AccountStorageDB]]: for address in self._dirty_accounts: store = self._account_stores[address] yield address, store @to_tuple def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]: # list all the accounts that were changed, and their new storage roots for address, store in self._dirty_account_stores(): if store.has_changed_root: yield address, store.get_changed_root() def _get_storage_root(self, address: Address) -> Hash32: account = self._get_account(address) return account.storage_root def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None: account = self._get_account(address) self._set_account(address, account.copy(storage_root=new_storage_root)) def _validate_flushed_storage(self, address: Address, store: AccountStorageDB) -> None: if store.has_changed_root: actual_storage_root = self._get_storage_root(address) expected_storage_root = store.get_changed_root() if expected_storage_root != actual_storage_root: raise ValidationError( "Storage root was not saved to account before trying to persist roots. " "Account %r had storage %r, but should be %r." % ( address, actual_storage_root, expected_storage_root, )) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") code_hash = self.get_code_hash(address) if code_hash == EMPTY_SHA3: return b'' else: try: return self._journaldb[code_hash] except KeyError: raise MissingBytecode(code_hash) from KeyError def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] self._wipe_storage(address) def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") account_rlp = self._get_encoded_account(address, from_journal=True) return account_rlp != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 # # Internal # def _get_encoded_account(self, address: Address, from_journal: bool = True) -> bytes: lookup_trie = self._journaltrie if from_journal else self._trie_cache try: return lookup_trie[address] except trie_exceptions.MissingTrieNode as exc: raise MissingAccountTrieNode(*exc.args) from exc except KeyError: # In case the account is deleted in the JournalDB return b'' def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = self._get_encoded_account(address, from_journal) if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> JournalDBCheckpoint: checkpoint = self._journaldb.record() self._journaltrie.record(checkpoint) for _, store in self._dirty_account_stores(): store.record(checkpoint) return checkpoint def discard(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.discard(checkpoint) self._journaltrie.discard(checkpoint) self._account_cache.clear() for _, store in self._dirty_account_stores(): store.discard(checkpoint) def commit(self, checkpoint: JournalDBCheckpoint) -> None: self._journaldb.commit(checkpoint) self._journaltrie.commit(checkpoint) for _, store in self._dirty_account_stores(): store.commit(checkpoint) def make_state_root(self) -> Hash32: for _, store in self._dirty_account_stores(): store.make_storage_root() for address, storage_root in self._get_changed_roots(): self.logger.debug2( "Updating account 0x%s to storage root 0x%s", address.hex(), storage_root.hex(), ) self._set_storage_root(address, storage_root) self._journaldb.persist() diff = self._journaltrie.diff() # In addition to squashing (which is redundant here), this context manager causes # an atomic commit of the changes, so exceptions will revert the trie with self._trie.squash_changes() as memory_trie: self._apply_account_diff_without_proof(diff, memory_trie) self._journaltrie.reset() self._trie_cache.reset_cache() return self.state_root def persist(self) -> None: self.make_state_root() # persist storage with self._raw_store_db.atomic_batch() as write_batch: for address, store in self._dirty_account_stores(): self._validate_flushed_storage(address, store) store.persist(write_batch) for address, new_root in self._get_changed_roots(): if new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH: raise ValidationError( "After persisting storage trie, a root node was not found. " "State root for account 0x%s is missing for hash 0x%s." % ( address.hex(), new_root.hex(), )) # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() # persist accounts self._validate_generated_root() new_root_hash = self.state_root self.logger.debug2("Persisting new state root: 0x%s", new_root_hash.hex()) with self._raw_store_db.atomic_batch() as write_batch: self._batchtrie.commit_to(write_batch, apply_deletes=False) self._batchdb.commit_to(write_batch, apply_deletes=False) self._root_hash_at_last_persist = new_root_hash def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): raise ValidationError( "AccountDB had a dirty db when it needed to be clean: %r" % db_diff) trie_diff = self._journaltrie.diff() if len(trie_diff): raise ValidationError( "AccountDB had a dirty trie when it needed to be clean: %r" % trie_diff) def _log_pending_accounts(self) -> None: diff = self._journaltrie.diff() for address in sorted(diff.pending_keys()): account = self._get_account(Address(address)) self.logger.debug2( "Pending Account %s: balance %d, nonce %d, storage root %s, code hash %s", to_checksum_address(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), ) for deleted_address in sorted(diff.deleted_keys()): cast_deleted_address = Address(deleted_address) self.logger.debug2( "Deleted Account %s, empty? %s, exists? %s", to_checksum_address(deleted_address), self.account_is_empty(cast_deleted_address), self.account_exists(cast_deleted_address), ) def _apply_account_diff_without_proof(self, diff: DBDiff, trie: BaseDB) -> None: """ Apply diff of trie updates, when original nodes might be missing. Note that doing this naively will raise exceptions about missing nodes from *intermediate* trie roots. This captures exceptions and uses the previous trie root hash that will be recognized by other nodes. """ # It's fairly common that when an account is deleted, we need to retrieve nodes # for accounts that were not needed during normal execution. We only need these # nodes to refactor the trie. for delete_key in diff.deleted_keys(): try: del trie[delete_key] except trie_exceptions.MissingTrieNode as exc: self.logger.debug( "Missing node while deleting account with key %s: %s", encode_hex(delete_key), exc, ) raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc # It's fairly unusual, but possible, that setting an account will need unknown # nodes during a trie refactor. Here is an example that seems to cause it: # # Setup: # - Root node is a branch, with 0 pointing to a leaf # - The complete leaf key is (0, 1, 2), so (1, 2) is in the leaf node # - We know the leaf node hash but not the leaf node body # Refactor that triggers missing node: # - Add value with key (0, 3, 4) # - We need to replace the current leaf node with a branch that points leaves at 1 and 3 # - The leaf for key (0, 1, 2) now contains only the (2) part, so needs to be rebuilt # - We need the full body of the old (1, 2) leaf node, to rebuild for key, val in diff.pending_items(): try: trie[key] = val except trie_exceptions.MissingTrieNode as exc: self.logger.debug( "Missing node on account update key %s to %s: %s", encode_hex(key), encode_hex(val), exc, ) raise MissingAccountTrieNode( exc.missing_node_hash, self._root_hash_at_last_persist, exc.requested_key, ) from exc
class AlchemyModel(t.Generic[T], QtCore.QAbstractTableModel, metaclass=_SqlAlchemyTableModelMeta): _columns: t.Sequence[ConvertibleColumn] def __init__( self, model_type: t.Type[T], order_by: QueryableAttribute, *, columns: t.Optional[t.Sequence[ConvertibleColumn]] = None, page_size: int = 64, auto_commit: bool = True, ): super().__init__() self._model_type = model_type self._order_by_column = order_by self._page_size = page_size self._auto_commit = auto_commit if columns is not None: self._columns = columns if not self._columns: raise ValueError('Specify at least one column') self._header_names = tuple(' '.join(v.capitalize() for v in c.column.name.split('_')) for c in self._columns) self._cache = LRU(int(page_size * 2)) self._cached_size = None def filter_query(self, query: Query) -> Query: return query def get_query(self) -> Query: return self.filter_query(EDB.Session.query(self._model_type)) def clear_cache(self) -> None: self._cache.clear() self._cached_size = None def _load_page(self, offset: int, limit: int) -> None: for idx, model in enumerate(self.get_query().order_by( self._order_by_column).limit(limit).offset(offset)): self._cache[idx + offset] = model def get_item_at_index(self, index: int) -> t.Optional[T]: if index < 0: return None try: return self._cache[index] except KeyError: self._load_page(index, self._page_size) return self._cache.get(index, None) def rowCount(self, parent: QModelIndex = ...) -> int: if self._cached_size is None: self._cached_size = self.get_query().count() return self._cached_size def columnCount(self, parent: QModelIndex = ...) -> int: return len(self._columns) def data(self, index: QModelIndex, role: int = ...) -> t.Any: if not role in (Qt.DisplayRole, Qt.EditRole): return None row = self.get_item_at_index(index.row()) if not row: return None column = self._columns[index.column()] return column.to_primitive(getattr(row, column.column.name)) def setData(self, index: QModelIndex, value: t.Any, role: int = ...) -> bool: if role != Qt.EditRole: return False row = self.get_item_at_index(index.row()) if not row: return False column = self._columns[index.column()] setattr(row, column.column.name, column.from_primitive(value)) if self._auto_commit: EDB.Session.commit() if self._columns[index.column()] == self._order_by_column: self.clear_cache() return True def flags(self, index: QModelIndex) -> Qt.ItemFlags: return Qt.ItemIsSelectable | Qt.ItemIsEditable | Qt.ItemIsEnabled def headerData(self, section: int, orientation: Qt.Orientation, role: int = ...) -> t.Any: if role != Qt.DisplayRole: return None if orientation == Qt.Vertical: return str(section + 1) return self._header_names[section] def removeRows(self, row: int, count: int, parent: QModelIndex = ...) -> bool: self.beginRemoveRows(parent, row, row - 1 + count) pk_column = self._model_type.__mapper__.primary_key[0] items = list( filter(lambda i: i is not None, (getattr(self.get_item_at_index(idx), pk_column.name) for idx in range(row, row + count)))) if not items: return False EDB.Session.query(self._model_type).filter( pk_column.in_(items)).delete(syncronize_session='fetch', ) if self._auto_commit: EDB.Session.commit() self.clear_cache() self.endRemoveRows() return True def pop(self, row: int) -> t.Optional[T]: model = self.get_item_at_index(row) if not model: return self.removeRows(row, 1) return model def moveRows( self, sourceParent: QModelIndex, sourceRow: int, count: int, destinationParent: QModelIndex, destinationChild: int, ) -> bool: self.beginMoveRows(QModelIndex(), sourceRow, sourceRow + count - 1, QModelIndex(), destinationChild) floor = min(sourceRow, destinationChild) items = [ _item for _item in ( self.get_item_at_index(idx) for idx in range(floor, max(sourceRow, destinationChild) + count)) if _item is not None ] old_values = [ getattr(_item, self._order_by_column.name) for _item in items ] for _ in range(count): items.insert(destinationChild - floor, items.pop(sourceRow - floor)) for item, new_value in zip(items, old_values): setattr(item, self._order_by_column.name, new_value) if self._auto_commit: EDB.Session.commit() self.clear_cache() self.endMoveRows() return True def reset(self) -> None: self.beginResetModel() self.clear_cache() self.endResetModel()
class SpotifyCache: def __init__(self) -> None: self._tracks = LRU(50) self._artists = LRU(50) self._audio_features = LRU(50) self._top_tracks = LRU(50) self._albums = LRU(50) self._queries: typing.Dict[str, LRU] = { i: LRU(50) for i in ("artist", "track", "album") } self._task: asyncio.Task[None] = asyncio.create_task( self.start_clear_loop()) def __del__(self) -> None: self._task.cancel() # pylint: disable=redefined-builtin def get_container(self, type: str) -> LRU: return getattr(self, f"{type}{'s'*(not type.endswith('s'))}") def update_items(self, items: typing.Sequence[BaseSpotify]) -> None: if not items: return self.get_container(items[0].type).update({ item.id: item for item in items if not item.__class__.__name__.startswith("Partial") }) def set_item(self, item: T) -> T: if item.__class__.__name__.startswith("Partial"): return item self.get_container(item.type)[item.id] = item return item @property def albums(self) -> LRU: return self._albums @property def tracks(self) -> LRU: return self._tracks @property def artists(self) -> LRU: return self._artists @property def audio_features(self) -> LRU: return self._audio_features @property def top_tracks(self) -> LRU: return self._top_tracks @property def queries(self) -> typing.Dict[str, LRU]: return self._queries def get_queries(self, type_name: str) -> LRU: return self._queries[type_name] async def start_clear_loop(self) -> None: while 1: await asyncio.sleep(86400) self._tracks.clear() self._artists.clear() self._audio_features.clear() self._top_tracks.clear() self._albums.clear() _ = [i.clear() for i in self._queries.values()]
class GelbooruViewer: API_URL = "https://gelbooru.com/index.php?page=dapi&s=post&q=index" MAX_ID = 1 MAX_ID_LOCK = Lock() MAX_CACHE_SIZE = 32 MAX_CACHE_TIME = 24 * 60 # minutes PICTURES_PER_TAG = 200 def __init__(self): self.session = requests.Session() self.session.headers.update( { 'Accept': 'application/json, application/xml', 'Accept-Language': 'en-US', 'User-Agent': 'Mozilla/5.0 GelbooruViewer/1.0 (+https://github.com/ArchieMeng/GelbooruViewer)' } ) # only cache for get_all with tags while pid is 0!!! if importlib.find_loader('lru'): from lru import LRU self.cache = LRU(GelbooruViewer.MAX_CACHE_SIZE) else: self.cache = dict() self.cache_lock = Lock() # occasionally update cache self.last_cache_used = time() self.update_cache_thread = Thread(target=self._update_cache_loop, daemon=True) self.update_cache_thread.start() # get latest image to update MAX_ID self.get(limit=0) def _update_cache(self, tags, num=None): """ Do the update cache task :param tags: tags of picture to update to cache :param num: amount of pictures :return: """ if tags: result = [*self.get_all_generator(tags, 0, num, thread_limit=1, limit=100)] if result: key = '+'.join(tags) with self.cache_lock: self.cache[key] = result def _update_cache_loop(self): """ Occasionally refresh cache. Clear cache if unused for a long time. :return: """ minutes = 2 * 60 while True: sleep(60 * minutes) if time() - self.last_cache_used > self.MAX_CACHE_TIME * 60: self.cache.clear() gc.collect() continue with self.cache_lock: keys = self.cache.keys() with ThreadPoolExecutor(max_workers=2) as executor: futures = [executor.submit(self._update_cache, key.split('+'), GelbooruViewer.PICTURES_PER_TAG) for key in keys] for future in as_completed(futures): try: result = future.result() print(result) except Exception as e: print("Exception happened in GelbooruViewer._update_cache_loop", type(e), e) def get_raw_content(self, **kwargs): content = None with self.session as session: response = session.get(GelbooruViewer.API_URL, params=kwargs) try: content = response.content except Exception as e: logging.error(str(e)) pass return content def get(self, **kwargs)->list: """ use Gelbooru api to fetch picture info. :param kwargs: allowed args includes limit: How many posts you want to retrieve. There is a hard limit of 100 posts per request. pid: The page number. cid: Change ID of the post. This is in Unix time so there are likely others with the same value if updated at the same time. tags: The tags to search for. Any tag combination that works on the web site will work here. This includes all the meta-tags. See cheatsheet for more information. :return: a list of type GelbooruPicture, if sth wrong happened, a empty list will be return """ attempt = 0 content = None while attempt < 3 and content is None: attempt += 1 content = self.get_raw_content(**kwargs) if content is None: return [] if isinstance(content, bytes): xml_str = content.decode('utf-8') else: xml_str = content root = ElementTree.fromstring(xml_str) posts = root.findall('post') picture_list = [] if posts: cur_max_id = int(posts[0].attrib['id']) with GelbooruViewer.MAX_ID_LOCK: GelbooruViewer.MAX_ID = max(GelbooruViewer.MAX_ID, cur_max_id) else: return None for post in posts: info = post.attrib picture_list.append( GelbooruPicture( info['width'], info['height'], info['score'], info['source'], "https:"+info['preview_url'], "https:"+info['sample_url'], "https:"+info['file_url'], info['created_at'], info['creator_id'], [tag for tag in info['tags'].split(' ') if tag and not tag.isspace()], info['id'], info['rating'] ) ) return picture_list def get_all(self, tags: list, pid=0, num=None, thread_limit=5, use_cache=True, limit=25): """ regardless of official request limit amount, use threading to request amount you want When pictures is found in cache, list is returned. When pictures is found but not in cache, generator is returned. Else, None is returned :param limit: number of pictures in per request :param use_cache: whether prefer internal cache :param thread_limit: amount of threads running at the same time :param tags: tags must be provided :param pid: beginning page id , index from 0 :param num: num of picture you want. This function might return less pictures than u want only if Gelbooru hasn't got enough picture :return: a generator of gelboorupicture or list or None """ tags.sort() if use_cache and pid == 0: with self.cache_lock: key = '+'.join(tags) if key in self.cache and isinstance(self.cache[key], list): self.last_cache_used = time() if not num: return self.cache[key] else: return self.cache[key][:num] elif key not in self.cache or isinstance(self.cache[key], str): self.last_cache_used = time() # only one thread is executed during update. When update executed, a str is put into cache self.cache[key] = "executing" # currently cache size is limited in cate of Memory leak. thread = Thread( target=self._update_cache, args=(tags, GelbooruViewer.PICTURES_PER_TAG), daemon=True ) thread.start() content = self.get_raw_content(tags=tags, limit=0) xml_str = content.decode('utf-8') root = ElementTree.fromstring(xml_str) try: total = int(root.attrib['count']) except: return None if total > 0: return self.get_all_generator(tags, pid, num, thread_limit, total, limit) else: return None def get_all_generator( self, tags: list, pid=0, num=None, thread_limit=5, total=None, limit=25 ): """ True function of get all. Generator is returned :param thread_limit: max threads to fetch pictures at one time :param tags: tags of pictures :param pid: beginning page id , index from 0 :param num: num of picture you want.num of picture you want. This function might return less pictures than u want only if Gelbooru hasn't got enough picture :param total: total amount of picture, just set None if u don't know it. This is used by internal function :param limit: picture number per request. Generally, limit=10 cost 1.2s per request, while 25 cost 1.4s, 50 cost 2.2s, 100 cost 2.6s. The Larger limit , the faster speed in per request, but larger in total get_all timing. :return: """ if limit < 0 or limit > 100: limit = 10 def _get(tags, pid): content = self.get_raw_content(tags=tags, limit=limit, pid=pid) xml_string = content.decode() posts = ElementTree.fromstring(xml_string).findall('post') return posts if total is None: content = self.get_raw_content(tags=tags, limit=0) xml_str = content.decode('utf-8') root = ElementTree.fromstring(xml_str) total = int(root.attrib['count']) if isinstance(num, int): if num > 0: # if total amount is too large, use num instead. total = min(total, num) if tags and total > 0: with ThreadPoolExecutor(max_workers=thread_limit) as executor: final_pid = int(total / limit) start = pid tasks = [] while start < final_pid + 1: futures2idx = { executor.submit(_get, tags, i): i for i in tasks + [j for j in range(start, min(start + thread_limit, final_pid + 1))] } tasks = [] for future in as_completed(futures2idx): idx = futures2idx[future] try: posts = future.result() for post in posts: info = post.attrib yield GelbooruPicture( info['width'], info['height'], info['score'], info['source'], "https:" + info['preview_url'], "https:" + info['sample_url'], "https:" + info['file_url'], info['created_at'], info['creator_id'], [tag for tag in info['tags'].split(' ') if tag and not tag.isspace()], info['id'], info['rating'] ) except Exception as e: print("GelbooruViewer.get_all_generators raise", type(e), e) tasks.append(idx) start += thread_limit
class FCP(BaseTask): def __init__(self, circle, src, dest, treewalk=None, totalsize=0, hostcnt=0, prune=False, verify=False, resume=False, workq=None): BaseTask.__init__(self, circle) self.circle = circle self.treewalk = treewalk self.totalsize = totalsize self.prune = prune self.workq = workq self.resume = resume self.checkpoint_file = None self.src = src self.dest = os.path.abspath(dest) # cache, keep the size conservative # TODO: we need a more portable LRU size if hostcnt != 0: max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE) procs_per_host = self.circle.size / hostcnt self._read_cache_limit = ((max_ofile - 64) / procs_per_host) / 3 self._write_cache_limit = ((max_ofile - 64) / procs_per_host) * 2 / 3 if self._read_cache_limit <= 0 or self._write_cache_limit <= 0: self._read_cache_limit = 1 self._write_cache_limit = 8 self.rfd_cache = LRU(self._read_cache_limit) self.wfd_cache = LRU(self._write_cache_limit) self.cnt_filesize_prior = 0 self.cnt_filesize = 0 self.blocksize = 1024 * 1024 self.chunksize = 1024 * 1024 # debug self.d = {"rank": "rank %s" % circle.rank} self.wtime_started = MPI.Wtime() self.wtime_ended = None self.workcnt = 0 # this is the cnt for the enqued items self.reduce_items = 0 # this is the cnt for processed items if self.treewalk: log.debug("treewalk files = %s" % treewalk.flist, extra=self.d) # fini_check self.fini_cnt = Counter() # verify self.verify = verify self.chunksums = [] # checkpointing self.checkpoint_interval = sys.maxsize self.checkpoint_last = MPI.Wtime() if self.circle.rank == 0: print("Start copying process ...") def rw_cache_limit(self): return (self._read_cache_limit, self._write_cache_limit) def set_fixed_chunksize(self, sz): self.chunksize = sz def set_adaptive_chunksize(self, totalsz): self.chunksize = utils.calc_chunksize(totalsz) if self.circle.rank == 0: print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize)) def cleanup(self): for f in self.rfd_cache.values(): try: os.close(f) except OSError as e: pass for f in self.wfd_cache.values(): try: os.close(f) except OSError as e: pass # remove checkpoint file if self.checkpoint_file and os.path.exists(self.checkpoint_file): os.remove(self.checkpoint_file) # we need to do this because if last job didn't finish cleanly # the fwalk files can be found as leftovers # and if fcp cleanup has a chance, it should clean up that fwalk = "%s/fwalk.%s" % (self.circle.tempdir, self.circle.rank) if os.path.exists(fwalk): os.remove(fwalk) def new_fchunk(self, fitem): fchunk = FileChunk() # default cmd = copy fchunk.src = fitem.path fchunk.dest = destpath(fitem, self.dest) return fchunk def enq_file(self, fi): """ Process a single file, represented by "fi" - FileItem It involves chunking this file and equeue all chunks. """ chunks = fi.st_size / self.chunksize remaining = fi.st_size % self.chunksize workcnt = 0 if fi.st_size == 0: # empty file fchunk = self.new_fchunk(fi) fchunk.offset = 0 fchunk.length = 0 self.enq(fchunk) workcnt += 1 else: for i in range(chunks): fchunk = self.new_fchunk(fi) fchunk.offset = i * self.chunksize fchunk.length = self.chunksize self.enq(fchunk) workcnt += chunks if remaining > 0: # send remainder fchunk = self.new_fchunk(fi) fchunk.offset = chunks * self.chunksize fchunk.length = remaining self.enq(fchunk) workcnt += 1 # save work cnt self.workcnt += workcnt log.debug("enq_file(): %s, size = %s, workcnt = %s" % (fi.path, fi.st_size, workcnt), extra=self.d) def handle_fitem(self, fi): if os.path.islink(fi.path): dest = destpath(fi, self.dest) linkto = os.readlink(fi.path) try: os.symlink(linkto, dest) except Exception as e: log.debug("%s, skipping sym link %s" % (e, fi.path), extra=self.d) elif stat.S_ISREG(fi.st_mode): self.enq_file(fi) # where chunking takes place def create(self): """ Each task has one create(), which is invoked by circle ONCE. For FCP, each task will handle_fitem() -> enq_file() to process each file gathered during the treewalk stage. """ if not G.use_store and self.workq: # restart self.setq(self.workq) return if self.resume: return # construct and enable all copy operations # we batch operation hard-coded log.info("create() starts, flist length = %s" % len(self.treewalk.flist), extra=self.d) if G.use_store: while self.treewalk.flist.qsize > 0: fitems, _ = self.treewalk.flist.mget(G.DB_BUFSIZE) for fi in fitems: self.handle_fitem(fi) self.treewalk.flist.mdel(G.DB_BUFSIZE) # store checkpoint log.debug("dbname = %s" % self.circle.dbname) dirname = os.path.dirname(self.circle.dbname) basename = os.path.basename(self.circle.dbname) chkpointname = basename + ".CHECK_OK" self.checkpoint_file = os.path.join(dirname, chkpointname) with open(self.checkpoint_file, "w") as f: f.write("%s" % self.totalsize) else: # use memory for fi in self.treewalk.flist: self.handle_fitem(fi) # memory-checkpoint if self.checkpoint_file: self.do_no_interrupt_checkpoint() self.checkpoint_last = MPI.Wtime() def do_open(self, k, d, flag, limit): """ @param k: the file path @param d: dictionary of <path, file descriptor> @return: file descriptor """ if d.has_key(k): return d[k] if len(d.keys()) >= limit: # over the limit # clean up the least used old_k, old_v = d.items()[-1] try: os.close(old_v) except OSError as e: log.warn("FD for %s not valid when closing" % old_k, extra=self.d) fd = -1 try: fd = os.open(k, flag) except OSError as e: if e.errno == 28: # no space left log.error("Critical error: %s, exit!" % e, extra=self.d) self.circle.exit(0) # should abort else: log.error("OSError({0}):{1}, skipping {2}".format(e.errno, e.strerror, k), extra=self.d) else: if fd > 0: d[k] = fd finally: return fd @staticmethod def do_mkdir(work): src = work.src dest = work.dest if not os.path.exists(dest): os.makedirs(dest) def do_copy(self, work): src = work.src dest = work.dest basedir = os.path.dirname(dest) if not os.path.exists(basedir): os.makedirs(basedir) rfd = self.do_open(src, self.rfd_cache, os.O_RDONLY, self._read_cache_limit) if rfd < 0: return False wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY | os.O_CREAT, self._write_cache_limit) if wfd < 0: if args.force: try: os.unlink(dest) except OSError as e: log.error("Failed to unlink %s, %s " % (dest, e), extra=self.d) return False else: wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY, self._write_cache_limit) else: log.error("Failed to create output file %s" % dest, extra=self.d) return False # do the actual copy self.write_bytes(rfd, wfd, work) # update tally self.cnt_filesize += work.length if G.verbosity > 2: log.debug("Transferred %s bytes from:\n\t [%s] to [%s]" % (self.cnt_filesize, src, dest), extra=self.d) return True def do_no_interrupt_checkpoint(self): a = Thread(target=self.do_checkpoint) a.start() a.join() log.debug("checkpoint: %s" % self.checkpoint_file, extra=self.d) def do_checkpoint(self): for k in self.wfd_cache.keys(): os.close(self.wfd_cache[k]) # clear the cache self.wfd_cache.clear() tmp_file = self.checkpoint_file + ".part" with open(tmp_file, "wb") as f: cobj = Checkpoint(self.src, self.dest, self.get_workq(), self.totalsize) pickle.dump(cobj, f, pickle.HIGHEST_PROTOCOL) # POSIX requires rename to be atomic os.rename(tmp_file, self.checkpoint_file) def process(self): """ The only work is "copy" TODO: clean up other actions such as mkdir/fini_check """ if not G.use_store: curtime = MPI.Wtime() if curtime - self.checkpoint_last > self.checkpoint_interval: self.do_no_interrupt_checkpoint() log.info("Checkpointing done ...", extra=self.d) self.checkpoint_last = curtime work = self.deq() self.reduce_items += 1 if isinstance(work, FileChunk): self.do_copy(work) else: log.warn("Unknown work object: %s" % work, extra=self.d) def reduce_init(self, buf): buf['cnt_filesize'] = self.cnt_filesize def reduce(self, buf1, buf2): buf1['cnt_filesize'] += buf2['cnt_filesize'] return buf1 def reduce_report(self, buf): out = "" if self.totalsize != 0: out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) / self.totalsize) out += "%s copied" % bytes_fmt(buf['cnt_filesize']) if self.circle.reduce_time_interval != 0: rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) / self.circle.reduce_time_interval self.cnt_filesize_prior = buf['cnt_filesize'] out += ", estimated transfer rate: %s/s" % bytes_fmt(rate) print(out) def reduce_finish(self, buf): # self.reduce_report(buf) pass def epilogue(self): global taskloads self.wtime_ended = MPI.Wtime() taskloads = self.circle.comm.gather(self.reduce_items) if self.circle.rank == 0: if self.totalsize == 0: print("\nZero filesize detected, done.\n") return tlapse = self.wtime_ended - self.wtime_started rate = float(self.totalsize) / tlapse print("\nFCP Epilogue:\n") print("\t{:<20}{:<20}".format("Ending at:", utils.current_time())) print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse))) print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate))) print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads)) def read_then_write(self, rfd, wfd, work, num_of_bytes, m): """ core entry point for copy action: first read then write. @param num_of_bytes: the exact amount of bytes we will copy @return: False if unsuccessful. """ buf = None try: buf = readn(rfd, num_of_bytes) except IOError: self.logger.error("Failed to read %s", work.src, extra=self.d) return False try: writen(wfd, buf) except IOError: self.logger.error("Failed to write %s", work.dest, extra=self.d) return False if m: m.update(buf) return True def write_bytes(self, rfd, wfd, work): os.lseek(rfd, work.offset, os.SEEK_SET) os.lseek(wfd, work.offset, os.SEEK_SET) m = None if self.verify: m = hashlib.sha1() remaining = work.length while remaining != 0: if remaining >= self.blocksize: self.read_then_write(rfd, wfd, work, self.blocksize, m) remaining -= self.blocksize else: self.read_then_write(rfd, wfd, work, remaining, m) remaining = 0 if self.verify: # use src path here ck = ChunkSum(work.src, offset=work.offset, length=work.length, digest=m.hexdigest()) self.chunksums.append(ck)
class Cache: """Class representing D3N.""" # Replacement policies LRU = "LRU" LFU = "LFU" LRU_S = "LRU_S" FIFO = "FIFO" RAND = "RAND" # Write policies WRITE_BACK = "WB" WRITE_THROUGH = "WT" # Layer L1 = "L1" L2 = "L2" consistent = "consistent" rendezvous = "rendezvous" rr = "rr" def __init__(self, layer, size, replace_pol, write_pol, hash_ring, hash_type, obj_size, full_size, logger): self._replace_pol = replace_pol # Replacement policy self._write_pol = write_pol # Write policy self._layer = layer # Layer info self._size = size # Cache size self.spaceLeft = size # Cache size self._logger = logger self.hashmap = {} # Mapping self.hash_ring = hash_ring self._hash_type = hash_type self._obj_size = obj_size if (self._size == 0): self.zerosize = True self._size = 1 else: self.zerosize = False if (self._replace_pol == Cache.LRU): self.cache = LRU(self._size) elif (self._replace_pol == Cache.FIFO): self.cache = deque() elif (self._replace_pol == Cache.LRU_S): self.cache = LRU(self._size) self.shadow = LRU(full_size) self.hist = [] for i in range(full_size): self.hist.append(0) # Statistics self._hit_count = 0 self._miss_count = 0 self._backend_bw = 0 self._crossrack_bw = 0 self._intrarack_bw = 0 self.miss_lat = 0 self.lat_count = 0 def _insert1(self, key, size): # No eviction if not self.zerosize: if (self._replace_pol == Cache.LRU_S): self.shadow[key] = 1 if (int(size) <= self.spaceLeft): if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) else: while (int(size) > self.spaceLeft): self._evict() if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) def _insert(self, key, size): # No eviction if not self.zerosize: if (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) self.shadow[key] = int(size) elif (self._replace_pol == Cache.LRU): self.cache[key] = int(size) else: if (int(size) <= self.spaceLeft): if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) else: while (int(size) > self.spaceLeft): self._evict() if (self._replace_pol == Cache.LRU): self.cache[key] = int(size) elif (self._replace_pol == Cache.LRU_S): self.cache[key] = int(size) elif (self._replace_pol == Cache.FIFO): self.cache.append(key) self.hashmap[key] = int(size) self.spaceLeft -= int(size) def read1(self, key, size): if self._layer == "BE": return 1 if self.zerosize == True: return None """Read a object from the cache.""" r = None if (self._replace_pol == Cache.LRU_S): if self.shadow.has_key(key): count = 0 for i in self.shadow.keys(): if i == key: self.hist[count] += 1 break count += 1 self.shadow[key] = 1 if key in self.hashmap: if (self._replace_pol == Cache.LRU): self._update_use(key) elif (self._replace_pol == Cache.LRU_S): self._update_use(key) self._hit_count += 1 r = 1 else: self._miss_count += 1 return r def read(self, key, size): if self._layer == "BE": return 1 if self.zerosize == True: return None """Read a object from the cache.""" r = None if (self._replace_pol == Cache.LRU_S): if self.cache.has_key(key): self._hit_count += 1 self.cache[key] = self.cache[key] r = 1 else: self._miss_count += 1 if self.shadow.has_key(key): count = 0 for i in self.shadow.keys(): if i == key: self.hist[count] += 1 break count += 1 self.shadow[key] = 1 else: if key in self.hashmap: if (self._replace_pol == Cache.LRU): self._update_use(key) elif (self._replace_pol == Cache.LRU_S): self._update_use(key) self._hit_count += 1 r = 1 else: self._miss_count += 1 return r def checkKey(self, key): if self._layer == "BE": return 1 if self.zerosize == True: return 0 """Read a object from the cache.""" r = 0 if (self._replace_pol == Cache.LRU_S) or (self._replace_pol == Cache.LRU): if self.cache.has_key(key): r = 1 else: r = 0 return r def _evict(self): if (self._replace_pol == Cache.LRU): id = self.cache.peek_last_item()[0] del self.cache[id] elif (self._replace_pol == Cache.LRU_S): id = self.cache.peek_last_item()[0] del self.cache[id] elif (self._replace_pol == Cache.FIFO): id = self.cache.popleft() self.spaceLeft += int(self.hashmap[id]) del self.hashmap[id] def _update_use(self, key): """Update the use of a cache.""" if (self._replace_pol == Cache.LRU): self.cache[key] = self.hashmap[key] if (self._replace_pol == Cache.LRU_S): self.cache[key] = self.hashmap[key] def set_cache_size(self, size): new_size = self.cache.get_size() + int(size) self.cache.set_size(int(new_size)) def set_backend_bw(self, value): self._backend_bw += value def set_crossrack_bw(self, value): self._crossrack_bw += value def set_intrarack_bw(self, value): self._intrarack_bw += value def get_backend_bw(self): return self._backend_bw def get_crossrack_bw(self): return self._crossrack_bw def get_intrarack_bw(self): return self._intrarack_bw def get_replace_pol(self): return self._replace_pol def get_hit_count(self): return self._hit_count def get_miss_count(self): return self._miss_count def get_available_space(self): return self.spaceLeft def get_replace_poll(self): return self._replace_pol def reset_shadow_cache(): self.shadow.clear() def print_cache(self): print self.cache def get_l2_address(self, key): if (self._hash_type == Cache.consistent): return self.hash_ring.get_node(key) elif (self._hash_type == Cache.rendezvous): return self.hash_ring.find_node(key) elif (self._hash_type == Cache.rr): val = key.split("_")[1] res = int(val) % int(self.hash_ring) return res
class PolygonIndex(object): include_only_properties = None simplify_tolerance = 0.0001 preserve_topology = True persistent_polygons = False cache_size = 0 fix_invalid_polygons = False INDEX_FILENAME = None POLYGONS_DB_DIR = 'polygons' def __init__(self, index=None, polygons=None, polygons_db=None, save_dir=None, index_filename=None, polygons_db_path=None, include_only_properties=None): if save_dir: self.save_dir = save_dir else: self.save_dir = None if not index_filename: index_filename = self.INDEX_FILENAME self.index_path = os.path.join(save_dir or '.', index_filename) if not index: self.create_index(overwrite=True) else: self.index = index if include_only_properties and hasattr(include_only_properties, '__contains__'): self.include_only_properties = include_only_properties if not polygons and not self.persistent_polygons: self.polygons = {} elif polygons and not self.persistent_polygons: self.polygons = polygons elif self.persistent_polygons and self.cache_size > 0: self.polygons = LRU(self.cache_size) if polygons: for key, value in six.iteritems(polygons): self.polygons[key] = value self.cache_hits = 0 self.cache_misses = 0 self.get_polygon = self.get_polygon_cached if not polygons_db_path: polygons_db_path = os.path.join(save_dir or '.', self.POLYGONS_DB_DIR) if not polygons_db: self.polygons_db = LevelDB(polygons_db_path) else: self.polygons_db = polygons_db self.setup() self.i = 0 def create_index(self, overwrite=False): raise NotImplementedError('Children must implement') def index_polygon(self, polygon): raise NotImplementedError('Children must implement') def setup(self): pass def clear_cache(self, garbage_collect=True): if self.persistent_polygons and self.cache_size > 0: self.polygons.clear() if garbage_collect: gc.collect() def simplify_polygon(self, poly, simplify_tolerance=None, preserve_topology=None): if simplify_tolerance is None: simplify_tolerance = self.simplify_tolerance if preserve_topology is None: preserve_topology = self.preserve_topology return poly.simplify(simplify_tolerance, preserve_topology=preserve_topology) def index_polygon_properties(self, properties): pass def polygon_geojson(self, poly, properties): return { 'type': 'Feature', 'geometry': mapping(poly), } def add_polygon(self, poly, properties, cache=False, include_only_properties=None): if include_only_properties is not None: properties = {k: v for k, v in properties.iteritems() if k in include_only_properties} if not self.persistent_polygons or cache: self.polygons[self.i] = prep(poly) if self.persistent_polygons: self.polygons_db.Put(self.polygon_key(self.i), json.dumps(self.polygon_geojson(poly, properties))) self.polygons_db.Put(self.properties_key(self.i), json.dumps(properties)) self.index_polygon_properties(properties) self.i += 1 @classmethod def create_from_shapefiles(cls, inputs, output_dir, index_filename=None, include_only_properties=None): index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME) for input_file in inputs: if include_only_properties is not None: include_props = include_only_properties.get(input_file, cls.include_only_properties) else: include_props = cls.include_only_properties f = fiona.open(input_file) index.add_geojson_like_file(f) return index @classmethod def fix_polygon(cls, poly): ''' Coerce to valid polygon ''' if not poly.is_valid: poly = poly.buffer(0) if not poly.is_valid: return None return poly @classmethod def to_polygon(cls, coords, holes=None, test_point=None): ''' Create shapely polygon from list of coordinate tuples if valid ''' if not coords or len(coords) < 3: return None # Fix for polygons crossing the 180th meridian lons = [lon for lon, lat in coords] if (max(lons) - min(lons) > 180): coords = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in coords] if holes: holes = [(lon + 360.0 if lon < 0 else lon, lat) for lon, lat in holes] poly = Polygon(coords, holes) try: if test_point is None: test_point = poly.representative_point() invalid = cls.fix_invalid_polygons and not poly.is_valid and not poly.contains(test_point) except Exception: invalid = True if invalid: try: poly_fix = cls.fix_polygon(poly) if poly_fix is not None and poly_fix.bounds and len(poly_fix.bounds) == 4 and poly_fix.is_valid and poly_fix.type == poly.type: if test_point is None: test_point = poly_fix.representative_point() if poly_fix.contains(test_point): poly = poly_fix except Exception: pass return poly def add_geojson_like_record(self, rec, include_only_properties=None): if not rec or not rec.get('geometry') or 'type' not in rec['geometry']: return poly_type = rec['geometry']['type'] if poly_type == 'Polygon': coords = rec['geometry']['coordinates'][0] poly = self.to_polygon(coords) if poly is None or not poly.bounds or len(poly.bounds) != 4: return self.index_polygon(poly) self.add_polygon(poly, rec['properties'], include_only_properties=include_only_properties) elif poly_type == 'MultiPolygon': polys = [] poly_coords = rec['geometry']['coordinates'] for coords in poly_coords: poly = self.to_polygon(coords[0]) if poly is None or not poly.bounds or len(poly.bounds) != 4: continue polys.append(poly) self.index_polygon(poly) self.add_polygon(MultiPolygon(polys), rec['properties'], include_only_properties=include_only_properties) else: return def add_geojson_like_file(self, f, include_only_properties=None): ''' Add either GeoJSON or a shapefile record to the index ''' for rec in f: self.add_geojson_like_record(rec, include_only_properties=include_only_properties) @classmethod def create_from_geojson_files(cls, inputs, output_dir, index_filename=None, polys_filename=DEFAULT_POLYS_FILENAME, include_only_properties=None): index = cls(save_dir=output_dir, index_filename=index_filename or cls.INDEX_FILENAME) for input_file in inputs: if include_only_properties is not None: include_props = include_only_properties.get(input_file, cls.include_only_properties) else: include_props = cls.include_only_properties f = json.load(open(input_file)) index.add_geojson_like_file(f['features'], include_only_properties=include_props) return index def compact_polygons_db(self): self.polygons_db.CompactRange('\x00', '\xff') def save(self): self.save_index() self.save_properties(os.path.join(self.save_dir, DEFAULT_PROPS_FILENAME)) if not self.persistent_polygons: self.save_polygons(os.path.join(self.save_dir, DEFAULT_POLYS_FILENAME)) self.compact_polygons_db() self.save_polygon_properties(self.save_dir) def load_properties(self, filename): properties = json.load(open(filename)) self.i = int(properties.get('num_polygons', self.i)) def save_properties(self, out_filename): out = open(out_filename, 'w') json.dump({'num_polygons': str(self.i)}, out) def save_polygons(self, out_filename): out = open(out_filename, 'w') for i in xrange(self.i): poly = self.polygons[i] feature = { 'type': 'Feature', 'geometry': mapping(poly.context), } out.write(json.dumps(feature) + u'\n') def save_index(self): raise NotImplementedError('Children must implement') def load_polygon_properties(self, d): pass def save_polygon_properties(self, d): pass @classmethod def polygon_from_geojson(cls, feature): poly_type = feature['geometry']['type'] if poly_type == 'Polygon': coords = feature['geometry']['coordinates'] poly = cls.to_polygon(coords[0], holes=coords[1:] or None) return poly elif poly_type == 'MultiPolygon': polys = [] for coords in feature['geometry']['coordinates']: poly = cls.to_polygon(coords[0], holes=coords[1:] or None) polys.append(poly) return MultiPolygon(polys) @classmethod def load_polygons(cls, filename): f = open(filename) polygons = {} cls.i = 0 for line in f: feature = json.loads(line.rstrip()) polygons[cls.i] = prep(cls.polygon_from_geojson(feature)) cls.i += 1 return polygons @classmethod def load_index(cls, d, index_name=None): raise NotImplementedError('Children must implement') @classmethod def load(cls, d, index_name=None, polys_filename=DEFAULT_POLYS_FILENAME, properties_filename=DEFAULT_PROPS_FILENAME, polys_db_dir=POLYGONS_DB_DIR): index = cls.load_index(d, index_name=index_name or cls.INDEX_FILENAME) if not cls.persistent_polygons: polys = cls.load_polygons(os.path.join(d, polys_filename)) else: polys = None polygons_db = LevelDB(os.path.join(d, polys_db_dir)) polygon_index = cls(index=index, polygons=polys, polygons_db=polygons_db, save_dir=d) polygon_index.load_properties(os.path.join(d, properties_filename)) polygon_index.load_polygon_properties(d) return polygon_index def get_candidate_polygons(self, lat, lon): raise NotImplementedError('Children must implement') def get_properties(self, i): return json.loads(self.polygons_db.Get(self.properties_key(i))) def get_polygon(self, i): return self.polygons[i] def get_polygon_cached(self, i): poly = self.polygons.get(i, None) if poly is None: data = json.loads(self.polygons_db.Get(self.polygon_key(i))) poly = prep(self.polygon_from_geojson(data)) self.polygons[i] = poly self.cache_misses += 1 else: self.cache_hits += 1 return poly def __iter__(self): for i in xrange(self.i): yield self.get_properties(i), self.get_polygon(i) def __len__(self): return self.i def polygons_contain(self, candidates, point, return_all=False): containing = None if return_all: containing = [] for i in candidates: poly = self.get_polygon(i) contains = poly.contains(point) if contains: properties = self.get_properties(i) if not return_all: return properties else: containing.append(properties) return containing def polygon_key(self, i): return 'poly:{}'.format(i) def properties_key(self, i): return 'props:{}'.format(i) def point_in_poly(self, lat, lon, return_all=False): candidates = self.get_candidate_polygons(lat, lon) point = Point(lon, lat) return self.polygons_contain(candidates, point, return_all=return_all)
class SecurityPolicy: def __init__(self, principal: IPrincipal): self.principal = principal self._cache = LRU(100) def invalidate_cache(self): self._cache.clear() @profilable def check_permission(self, permission, obj): # Always allow public attributes if permission is Public: return True if IView.providedBy(obj): obj = obj.__parent__ # Iterate through participations ('principals') # and check permissions they give if self.principal is not None: # System user always has access if self.principal is SystemUser: return True # Check the permission groups = getattr(self.principal, "groups", None) or [] if self.cached_decision(obj, self.principal.id, groups, permission): return True return False def cache(self, parent, level=""): serial = getattr(parent, "__serial__", "") oid = getattr(parent, "__uuid__", "") cache_key = f"{id(parent)}-{oid}-{serial}-{level}" cache = self._cache.get(cache_key) if cache is None: cache = CacheEntry() self._cache[cache_key] = cache return cache @profilable def cached_decision(self, parent, principal, groups, permission): # Return the decision for a principal and permission cache = self.cache(parent) try: cache_decision = cache.decision except AttributeError: cache_decision = cache.decision = {} cache_decision_prin = cache_decision.get(principal) if not cache_decision_prin: cache_decision_prin = cache_decision[principal] = {} try: return cache_decision_prin[permission] except KeyError: pass # cache_decision_prin[permission] is the cached decision for a # principal and permission. # Check direct permissions # First recursive function to get the permissions of a principal decision = self.cached_principal_permission(parent, principal, groups, permission, "o") if decision is not None: cache_decision_prin[permission] = decision return decision # Check Roles permission # First get the Roles needed roles = cached_roles(parent, permission, "o") if roles: # Get the roles from the user prin_roles = self.cached_principal_roles(parent, principal, groups, "o") for role, setting in prin_roles.items(): if setting and (role in roles): cache_decision_prin[permission] = decision = True return decision cache_decision_prin[permission] = decision = False return decision @profilable def cached_principal_permission(self, parent, principal, groups, permission, level): # Compute the permission, if any, for the principal. cache = self.cache(parent, level) try: # We need to caches for first level and Allow Single if level == "o": cache_prin = cache.prino else: cache_prin = cache.prin except AttributeError: if level == "o": cache_prin = cache.prino = {} else: cache_prin = cache.prin = {} cache_prin_per = cache_prin.get(principal) if not cache_prin_per: cache_prin_per = cache_prin[principal] = {} try: return cache_prin_per[permission] except KeyError: pass # We reached the end of the recursive we check global / local if parent is None: # We check the global configuration of the user and groups prinper = self._global_permissions_for(principal, permission) if prinper is not None: cache_prin_per[permission] = prinper return prinper # If we did not found the permission for the user look at code prinper = SettingAsBoolean[code_principal_permission_setting(permission, principal, None)] # Now look for the group ids if prinper is None: for group in groups: prinper = SettingAsBoolean[code_principal_permission_setting(permission, group, None)] if prinper is not None: continue cache_prin_per[permission] = prinper return prinper # Get the local map of the permissions # As we want to quit as soon as possible we check first locally prinper_map = IPrincipalPermissionMap(parent, None) if prinper_map is not None: prinper = level_setting_as_boolean(level, prinper_map.get_setting(permission, principal, None)) if prinper is None: for group in groups: prinper = level_setting_as_boolean( level, prinper_map.get_setting(permission, group, None) ) if prinper is not None: # Once we conclude we exit # May happen that first group Deny and second # allows which will result on Deny for the first break if prinper is not None: return prinper # Find the permission recursivelly set to a user parent = getattr(parent, "__parent__", None) prinper = self.cached_principal_permission(parent, principal, groups, permission, "p") cache_prin_per[permission] = prinper return prinper def global_principal_roles(self, principal, groups): roles = dict( [(role, SettingAsBoolean[setting]) for (role, setting) in code_roles_for_principal(principal)] ) for group in groups: for role, settings in code_roles_for_principal(group): roles[role] = SettingAsBoolean[settings] roles["guillotina.Anonymous"] = True # Everybody has Anonymous # First the global roles from user + group groles = self._global_roles_for(principal) roles.update(groles) return roles def cached_principal_roles(self, parent, principal, groups, level): # Redefine it to get global roles cache = self.cache(parent, level) try: cache_principal_roles = cache.principal_roles except AttributeError: cache_principal_roles = cache.principal_roles = {} try: return cache_principal_roles[principal] except KeyError: pass # We reached the end so we go to see the global ones if parent is None: # Then the code roles roles = self.global_principal_roles(principal, groups) cache_principal_roles[principal] = roles return roles roles = self.cached_principal_roles(getattr(parent, "__parent__", None), principal, groups, "p") # We check the local map of roles prinrole = IPrincipalRoleMap(parent, None) if prinrole: roles = roles.copy() for role, setting in prinrole.get_roles_for_principal(principal): roles[role] = level_setting_as_boolean(level, setting) for group in groups: for role, setting in prinrole.get_roles_for_principal(group): roles[role] = level_setting_as_boolean(level, setting) cache_principal_roles[principal] = roles return roles def _global_roles_for(self, principal): """On a principal (user/group) get global roles.""" roles = {} groups = get_utility(IGroups) if self.principal and principal == self.principal.id: # Its the actual user id # We return all the global roles (including group) roles = self.principal.roles.copy() for group in self.principal.groups: roles.update(groups.get_principal(group, self.principal).roles) return roles # We are asking for group id so only group roles if groups: group = groups.get_principal(principal) return group.roles.copy() def _global_permissions_for(self, principal, permission): """On a principal (user + group) get global permissions.""" groups = get_utility(IGroups) if self.principal and principal == self.principal.id: # Its the actual user permissions = self.principal.permissions.copy() if permission in permissions: return level_setting_as_boolean("p", permissions[permission]) for group in self.principal.groups: permissions = groups.get_principal(group, self.principal).permissions if permission in permissions: return level_setting_as_boolean("p", permissions[permission]) return None
class FCP(BaseTask): def __init__(self, circle, src, dest, treewalk=None, totalsize=0, hostcnt=0, prune=False, verify=False, resume=False, workq=None): BaseTask.__init__(self, circle) self.circle = circle self.treewalk = treewalk self.totalsize = totalsize self.prune = prune self.workq = workq self.resume = resume self.checkpoint_file = None self.checkpoint_db = None self.src = src self.dest = os.path.abspath(dest) # cache, keep the size conservative # TODO: we need a more portable LRU size if hostcnt != 0: max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE) procs_per_host = self.circle.size / hostcnt self._read_cache_limit = ((max_ofile - 64) / procs_per_host) / 3 self._write_cache_limit = ((max_ofile - 64) / procs_per_host) * 2 / 3 if self._read_cache_limit <= 0 or self._write_cache_limit <= 0: self._read_cache_limit = 1 self._write_cache_limit = 8 self.rfd_cache = LRU(self._read_cache_limit) self.wfd_cache = LRU(self._write_cache_limit) self.cnt_filesize_prior = 0 self.cnt_filesize = 0 self.blocksize = 1024 * 1024 self.chunksize = 1024 * 1024 # debug self.d = {"rank": "rank %s" % circle.rank} self.wtime_started = MPI.Wtime() self.wtime_ended = None self.workcnt = 0 # this is the cnt for the enqued items self.reduce_items = 0 # this is the cnt for processed items if self.treewalk: log.debug("treewalk files = %s" % treewalk.flist, extra=self.d) # fini_check self.fini_cnt = Counter() # verify self.verify = verify self.use_store = False if self.verify: self.chunksums_mem = [] self.chunksums_buf = [] # checkpointing self.checkpoint_interval = sys.maxsize self.checkpoint_last = MPI.Wtime() if self.circle.rank == 0: print("Start copying process ...") def rw_cache_limit(self): return (self._read_cache_limit, self._write_cache_limit) def set_fixed_chunksize(self, sz): self.chunksize = sz def set_adaptive_chunksize(self, totalsz): self.chunksize = utils.calc_chunksize(totalsz) if self.circle.rank == 0: print("Adaptive chunksize: %s" % bytes_fmt(self.chunksize)) def cleanup(self): for f in self.rfd_cache.values(): try: os.close(f) except OSError as e: pass for f in self.wfd_cache.values(): try: os.close(f) except OSError as e: pass # remove checkpoint file if self.checkpoint_file and os.path.exists(self.checkpoint_file): os.remove(self.checkpoint_file) if self.checkpoint_db and os.path.exists(self.checkpoint_db): os.remove(self.checkpoint_db) # remove provided checkpoint file if G.resume and G.chk_file and os.path.exists(G.chk_file): os.remove(G.chk_file) if G.resume and G.chk_file_db and os.path.exists(G.chk_file_db): os.remove(G.chk_file_db) # remove chunksums file if self.verify: if hasattr(self, "chunksums_db"): self.chunksums_db.cleanup() # we need to do this because if last job didn't finish cleanly # the fwalk files can be found as leftovers # and if fcp cleanup has a chance, it should clean up that """ fwalk = "%s/fwalk.%s" % (G.tempdir, self.circle.rank) if os.path.exists(fwalk): os.remove(fwalk) """ def new_fchunk(self, fitem): fchunk = FileChunk() # default cmd = copy fchunk.src = fitem.path fchunk.dest = destpath(fitem, self.dest) return fchunk def enq_file(self, fi): """ Process a single file, represented by "fi" - FileItem It involves chunking this file and equeue all chunks. """ chunks = fi.st_size / self.chunksize remaining = fi.st_size % self.chunksize workcnt = 0 if fi.st_size == 0: # empty file fchunk = self.new_fchunk(fi) fchunk.offset = 0 fchunk.length = 0 self.enq(fchunk) workcnt += 1 else: for i in range(chunks): fchunk = self.new_fchunk(fi) fchunk.offset = i * self.chunksize fchunk.length = self.chunksize self.enq(fchunk) workcnt += chunks if remaining > 0: # send remainder fchunk = self.new_fchunk(fi) fchunk.offset = chunks * self.chunksize fchunk.length = remaining self.enq(fchunk) workcnt += 1 # save work cnt self.workcnt += workcnt log.debug("enq_file(): %s, size = %s, workcnt = %s" % (fi.path, fi.st_size, workcnt), extra=self.d) def handle_fitem(self, fi): if os.path.islink(fi.path): dest = destpath(fi, self.dest) linkto = os.readlink(fi.path) try: os.symlink(linkto, dest) except Exception as e: log.debug("%s, skipping sym link %s" % (e, fi.path), extra=self.d) elif stat.S_ISREG(fi.st_mode): self.enq_file(fi) # where chunking takes place def create(self): """ Each task has one create(), which is invoked by circle ONCE. For FCP, each task will handle_fitem() -> enq_file() to process each file gathered during the treewalk stage. """ if not G.use_store and self.workq: # restart self.setq(self.workq) return if self.resume: return # construct and enable all copy operations # we batch operation hard-coded log.info("create() starts, flist length = %s" % len(self.treewalk.flist), extra=self.d) # flist in memory if len(self.treewalk.flist) > 0: for fi in self.treewalk.flist: self.handle_fitem(fi) # flist in buf if len(self.treewalk.flist_buf) > 0: for fi in self.treewalk.flist_buf: self.handle_fitem(fi) # flist in database if self.treewalk.use_store: while self.treewalk.flist_db.qsize > 0: fitems, _ = self.treewalk.flist_db.mget(G.DB_BUFSIZE) for fi in fitems: self.handle_fitem(fi) self.treewalk.flist_db.mdel(G.DB_BUFSIZE) # both memory and databse checkpoint if self.checkpoint_file: self.do_no_interrupt_checkpoint() self.checkpoint_last = MPI.Wtime() # gather total_chunks self.circle.comm.barrier() G.total_chunks = self.circle.comm.allreduce(self.workcnt, op=MPI.SUM) #G.total_chunks = self.circle.comm.bcast(G.total_chunks) #print("Total chunks: ",G.total_chunks) def do_open(self, k, d, flag, limit): """ @param k: the file path @param d: dictionary of <path, file descriptor> @return: file descriptor """ if d.has_key(k): return d[k] if len(d.keys()) >= limit: # over the limit # clean up the least used old_k, old_v = d.items()[-1] try: os.close(old_v) except OSError as e: log.warn("FD for %s not valid when closing" % old_k, extra=self.d) fd = -1 try: fd = os.open(k, flag) except OSError as e: if e.errno == 28: # no space left log.error("Critical error: %s, exit!" % e, extra=self.d) self.circle.exit(0) # should abort else: log.error("OSError({0}):{1}, skipping {2}".format(e.errno, e.strerror, k), extra=self.d) else: if fd > 0: d[k] = fd finally: return fd @staticmethod def do_mkdir(work): src = work.src dest = work.dest if not os.path.exists(dest): os.makedirs(dest) def do_copy(self, work): src = work.src dest = work.dest basedir = os.path.dirname(dest) if not os.path.exists(basedir): os.makedirs(basedir) rfd = self.do_open(src, self.rfd_cache, os.O_RDONLY, self._read_cache_limit) if rfd < 0: return False wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY | os.O_CREAT, self._write_cache_limit) if wfd < 0: if args.force: try: os.unlink(dest) except OSError as e: log.error("Failed to unlink %s, %s " % (dest, e), extra=self.d) return False else: wfd = self.do_open(dest, self.wfd_cache, os.O_WRONLY, self._write_cache_limit) else: log.error("Failed to create output file %s" % dest, extra=self.d) return False # do the actual copy self.write_bytes(rfd, wfd, work) # update tally self.cnt_filesize += work.length if G.verbosity > 2: log.debug("Transferred %s bytes from:\n\t [%s] to [%s]" % (self.cnt_filesize, src, dest), extra=self.d) return True def do_no_interrupt_checkpoint(self): a = Thread(target=self.do_checkpoint) a.start() a.join() log.debug("checkpoint: %s" % self.checkpoint_file, extra=self.d) print("\nMake checkpoint files: ", self.checkpoint_file) def do_checkpoint(self): # when make checkpoint, first write workq and workq_buf into checkpoint file, then make a copy of workq_db if it exists for k in self.wfd_cache.keys(): os.close(self.wfd_cache[k]) # clear the cache self.wfd_cache.clear() tmp_file = self.checkpoint_file + ".part" with open(tmp_file, "wb") as f: self.circle.workq.extend(self.circle.workq_buf) self.circle.workq_buf.clear() cobj = Checkpoint(self.src, self.dest, self.get_workq(), self.totalsize) pickle.dump(cobj, f, pickle.HIGHEST_PROTOCOL) # POSIX requires rename to be atomic os.rename(tmp_file, self.checkpoint_file) # copy workq_db database file if hasattr(self.circle, "workq_db") and len(self.circle.workq_db) > 0: self.checkpoint_db = self.checkpoint_file + ".db" if not G.resume: shutil.copy2(self.circle.dbname, self.checkpoint_db) else: # in resume mode, make a copy of current workq db file, which is provided checkpoint db file self.workdir = os.getcwd() existingCheckpoint = os.path.join(self.workdir,".pcp_workq.%s.%s.db" % (G.rid, self.circle.rank)) shutil.copy2(existingCheckpoint,self.checkpoint_db) def process(self): """ The only work is "copy" TODO: clean up other actions such as mkdir/fini_check """ if not G.use_store: curtime = MPI.Wtime() if curtime - self.checkpoint_last > self.checkpoint_interval: self.do_no_interrupt_checkpoint() log.info("Checkpointing done ...", extra=self.d) self.checkpoint_last = curtime work = self.deq() self.reduce_items += 1 if isinstance(work, FileChunk): self.do_copy(work) else: log.warn("Unknown work object: %s" % work, extra=self.d) err_and_exit("Not a correct workq format") def reduce_init(self, buf): buf['cnt_filesize'] = self.cnt_filesize if sys.platform == 'darwin': buf['mem_snapshot'] = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss else: buf['mem_snapshot'] = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss * 1024 def reduce(self, buf1, buf2): buf1['cnt_filesize'] += buf2['cnt_filesize'] buf1['mem_snapshot'] += buf2['mem_snapshot'] return buf1 def reduce_report(self, buf): out = "" if self.totalsize != 0: out += "%.2f %% finished, " % (100 * float(buf['cnt_filesize']) / self.totalsize) out += "%s copied" % bytes_fmt(buf['cnt_filesize']) if self.circle.reduce_time_interval != 0: rate = float(buf['cnt_filesize'] - self.cnt_filesize_prior) / self.circle.reduce_time_interval self.cnt_filesize_prior = buf['cnt_filesize'] out += ", estimated transfer rate: %s/s" % bytes_fmt(rate) out += ", memory usage: %s" % bytes_fmt(buf['mem_snapshot']) print(out) def reduce_finish(self, buf): # self.reduce_report(buf) pass def epilogue(self): global taskloads self.wtime_ended = MPI.Wtime() taskloads = self.circle.comm.gather(self.reduce_items) if self.circle.rank == 0: if self.totalsize == 0: print("\nZero filesize detected, done.\n") return tlapse = self.wtime_ended - self.wtime_started rate = float(self.totalsize) / tlapse print("\nFCP Epilogue:\n") print("\t{:<20}{:<20}".format("Ending at:", utils.current_time())) print("\t{:<20}{:<20}".format("Completed in:", utils.conv_time(tlapse))) print("\t{:<20}{:<20}".format("Transfer Rate:", "%s/s" % bytes_fmt(rate))) print("\t{:<20}{:<20}".format("Use store chunksums:", "%s" % self.use_store)) print("\t{:<20}{:<20}".format("Use store workq:", "%s" % self.circle.use_store)) print("\t{:<20}{:<20}".format("FCP Loads:", "%s" % taskloads)) def read_then_write(self, rfd, wfd, work, num_of_bytes, m): """ core entry point for copy action: first read then write. @param num_of_bytes: the exact amount of bytes we will copy @return: False if unsuccessful. """ buf = None try: buf = readn(rfd, num_of_bytes) except IOError: self.logger.error("Failed to read %s", work.src, extra=self.d) return False try: writen(wfd, buf) except IOError: self.logger.error("Failed to write %s", work.dest, extra=self.d) return False if m: m.update(buf) return True def write_bytes(self, rfd, wfd, work): os.lseek(rfd, work.offset, os.SEEK_SET) os.lseek(wfd, work.offset, os.SEEK_SET) m = None if self.verify: m = hashlib.sha1() remaining = work.length while remaining != 0: if remaining >= self.blocksize: self.read_then_write(rfd, wfd, work, self.blocksize, m) remaining -= self.blocksize else: self.read_then_write(rfd, wfd, work, remaining, m) remaining = 0 if self.verify: # use src path here ck = ChunkSum(work.dest, offset=work.offset, length=work.length, digest=m.hexdigest()) if len(self.chunksums_mem) < G.memitem_threshold: self.chunksums_mem.append(ck) else: self.chunksums_buf.append(ck) if len(self.chunksums_buf) == G.DB_BUFSIZE: if self.use_store == False: self.workdir = os.getcwd() self.chunksums_dbname = "%s/chunksums.%s" % (G.tempdir, self.circle.rank) self.chunksums_db = DbStore(dbname=self.chunksums_dbname) self.use_store = True self.chunksums_db.mput(self.chunksums_buf) del self.chunksums_buf[:]
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseDB, state_root: Hash32 = BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: -> hash-trie -> storage lookups / db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: There is an opportunity to do something similar for storage AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie( HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool = True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account = self._get_account(address, from_journal) storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root)) slot_as_key = pad32(int_to_big_endian(slot)) if slot_as_key in storage: encoded_value = storage[slot_as_key] return rlp.decode(encoded_value, sedes=rlp.sedes.big_endian_int) else: return 0 def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account = self._get_account(address) storage = HashTrie(HexaryTrie(self._journaldb, account.storage_root)) slot_as_key = pad32(int_to_big_endian(slot)) if value: encoded_value = rlp.encode(value) storage[slot_as_key] = encoded_value else: del storage[slot_as_key] self._set_account(address, account.copy(storage_root=storage.root_hash)) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(storage_root=BLANK_ROOT_HASH)) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") try: return self._journaldb[self.get_code_hash(address)] except KeyError: return b"" def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash( address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") return self._journaltrie.get(address, b'') != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce( address) and self.get_balance(address) == 0 # # Internal # def _get_account(self, address: Address, from_journal: bool = True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = (self._journaltrie if from_journal else self._trie_cache).get(address, b'') if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> Tuple[UUID, UUID]: return (self._journaldb.record(), self._journaltrie.record()) def discard(self, changeset: Tuple[UUID, UUID]) -> None: db_changeset, trie_changeset = changeset self._journaldb.discard(db_changeset) self._journaltrie.discard(trie_changeset) self._account_cache.clear() def commit(self, changeset: Tuple[UUID, UUID]) -> None: db_changeset, trie_changeset = changeset self._journaldb.commit(db_changeset) self._journaltrie.commit(trie_changeset) def make_state_root(self) -> Hash32: self.logger.debug2("Generating AccountDB trie") self._journaldb.persist() self._journaltrie.persist() return self.state_root def persist(self) -> None: self.make_state_root() self._batchtrie.commit(apply_deletes=False) self._batchdb.commit(apply_deletes=True) def _log_pending_accounts(self) -> None: accounts_displayed = set() # type: Set[bytes] queued_changes = self._journaltrie.journal.journal_data.items() # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078 for _, accounts in reversed(queued_changes): for address in accounts: if address in accounts_displayed: continue else: accounts_displayed.add(address) account = self._get_account(Address(address)) self.logger.debug2( "Account %s: balance %d, nonce %d, storage root %s, code hash %s", encode_hex(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), )
class BaseSerializable(collections.abc.Sequence): cache = None def __init__(self, *args, cache=None, **kwargs): arg_names = self._meta.field_names or () validate_args_and_kwargs(args, kwargs, arg_names) field_values = merge_kwargs_to_args(args, kwargs, arg_names) # Ensure that all the fields have been given values in initialization if len(field_values) != len(arg_names): raise TypeError( "Argument count mismatch. expected {0} - got {1} - missing {2}" .format( len(arg_names), len(field_values), ",".join(arg_names[len(field_values):]), )) for value, attr in zip(field_values, self._meta.field_attrs or ()): setattr(self, attr, make_immutable(value)) self.cache = LRU(DEFAULT_CACHE_SIZE) if cache is None else cache def as_dict(self): return dict((field, value) for field, value in zip(self._meta.field_names, self)) def __iter__(self): for attr in self._meta.field_attrs: yield getattr(self, attr) def __getitem__(self, index): if isinstance(index, int): attr = self._meta.field_attrs[index] return getattr(self, attr) elif isinstance(index, slice): field_slice = self._meta.field_attrs[index] return tuple(getattr(self, field) for field in field_slice) elif isinstance(index, str): return getattr(self, index) else: raise IndexError("Unsupported type for __getitem__: {0}".format( type(index))) def __len__(self): return len(self._meta.fields) def __eq__(self, other): satisfies_class_relationship = issubclass( self.__class__, other.__class__) or issubclass( other.__class__, self.__class__) if not satisfies_class_relationship: return False else: return self.hash_tree_root == other.hash_tree_root def __getstate__(self): state = self.__dict__.copy() # The hash() builtin is not stable across processes # (https://docs.python.org/3/reference/datamodel.html#object.__hash__), so we do this here # to ensure pickled instances don't carry the cached hash() as that may cause issues like # https://github.com/ethereum/py-evm/issues/1318 state["_hash_cache"] = None return state _hash_cache = None def __hash__(self): if self._hash_cache is None: self._hash_cache = hash(self.__class__) * int.from_bytes( self.hash_tree_root, "little") return self._hash_cache def copy(self, *args, **kwargs): missing_overrides = (set(self._meta.field_names).difference( kwargs.keys()).difference(self._meta.field_names[:len(args)])) unchanged_kwargs = { key: value if is_immutable_field_value(value) else copy.deepcopy(value) for key, value in self.as_dict().items() if key in missing_overrides } combined_kwargs = dict(**unchanged_kwargs, **kwargs) all_kwargs = merge_args_to_kwargs(args, combined_kwargs, self._meta.field_names) result = type(self)(**all_kwargs) result.cache = self.cache return result def reset_cache(self): self.cache.clear() self._fixed_size_section_length_cache = None self._serialize_cache = None def __copy__(self): return self.copy() def __deepcopy__(self, memodict=None): if memodict is None: memodict = {} cls = self.__class__ result = cls.__new__(cls) memodict[id(self)] = result for k, v in self.__dict__.items(): if k != "cache": setattr(result, k, copy.deepcopy(v, memodict)) result.cache = self.cache result._fixed_size_section_length_cache = self._fixed_size_section_length_cache return result _fixed_size_section_length_cache = None _serialize_cache = None @property def hash_tree_root(self): return self.__class__.get_hash_tree_root(self, cache=True) @classmethod def get_sedes_id(cls) -> str: # Serializable implementation name should be unique return cls.__name__ def get_key(self) -> bytes: # Serilaize with self._meta.container_sedes key = get_base_key(self._meta.container_sedes, self).hex() if len(key) == 0: key = "" return f"{self.__class__.get_sedes_id()}{key}"
class BaseKernel: appid = "841e6cd456e05713213f413e8765648e" user_ids = np.array([ '0112DBCD5299791D5A53287D27F4E18A5', '0480704B8A3471FF360DD22AB5C3D9F8E', '09B78AFBFCF3F97F34F12F945769FBD8B' ]) def __init__(self): # if not self.user_ids or self.user_ids.size == 0: # self.uid = self.register() # else: self.uid = self.user_ids[1] #np.random.choice(self.user_ids, 1)[0] self.cache = LRU(300) # thread = Thread(target=self.schedule) # thread.start() def kernel(self, q): # if q in self.cache: # return self.cache[q] answer = self.chat(q) # self.cache[q] = answer return answer def register(self): register_data = {"cmd": "register", "appid": self.appid} url = "http://idc.emotibot.com/api/ApiKey/openapi.php" r = requests.post(url, params=register_data) response = json.dumps(r.json(), ensure_ascii=False) jsondata = json.loads(response) datas = jsondata.get('data') for data in datas: return data.get('value') def chat(self, q): try: register_data = { "cmd": "chat", "appid": self.appid, "userid": self.uid, "text": q, "location": "南京" } url = "http://idc.emotibot.com/api/ApiKey/openapi.php" r = requests.post(url, params=register_data) response = json.dumps(r.json(), ensure_ascii=False) jsondata = json.loads(response) datas = jsondata.get("data") for data in datas: response = data.get('value') if response: break return response except Exception: return 'base kernel is detached' def clear_cache(self): print('clear') self.cache.clear() def schedule(self): schedule.every().hour.do(self.clear_cache) while True: schedule.run_pending() time.sleep(60)
class AccountDB(BaseAccountDB): logger = cast(ExtendedDebugLogger, logging.getLogger('eth.db.account.AccountDB')) def __init__(self, db: BaseAtomicDB, state_root: Hash32=BLANK_ROOT_HASH) -> None: r""" Internal implementation details (subject to rapid change): Database entries go through several pipes, like so... .. code:: db > _batchdb ---------------------------> _journaldb ----------------> code lookups \ -> _batchtrie -> _trie -> _trie_cache -> _journaltrie --------------> account lookups Journaling sequesters writes at the _journal* attrs ^, until persist is called. _batchtrie enables us to prune all trie changes while building state, without deleting old trie roots. _batchdb and _batchtrie together enable us to make the state root, without saving everything to the database. _journaldb is a journaling of the keys and values used to store code and account storage. _trie is a hash-trie, used to generate the state root _trie_cache is a cache tied to the state root of the trie. It is important that this cache is checked *after* looking for the key in _journaltrie, because the cache is only invalidated after a state root change. _journaltrie is a journaling of the accounts (an address->rlp mapping, rather than the nodes stored by the trie). This enables a squashing of all account changes before pushing them into the trie. .. NOTE:: StorageDB works similarly AccountDB synchronizes the snapshot/revert/persist of both of the journals. """ self._raw_store_db = db self._batchdb = BatchDB(db) self._batchtrie = BatchDB(db) self._journaldb = JournalDB(self._batchdb) self._trie = HashTrie(HexaryTrie(self._batchtrie, state_root, prune=True)) self._trie_cache = CacheDB(self._trie) self._journaltrie = JournalDB(self._trie_cache) self._account_cache = LRU(2048) self._account_stores = {} # type: Dict[Address, AccountStorageDB] self._dirty_accounts = set() # type: Set[Address] @property def state_root(self) -> Hash32: return self._trie.root_hash @state_root.setter def state_root(self, value: Hash32) -> None: if self._trie.root_hash != value: self._trie_cache.reset_cache() self._trie.root_hash = value def has_root(self, state_root: bytes) -> bool: return state_root in self._batchtrie # # Storage # def get_storage(self, address: Address, slot: int, from_journal: bool=True) -> int: validate_canonical_address(address, title="Storage Address") validate_uint256(slot, title="Storage Slot") account_store = self._get_address_store(address) return account_store.get(slot, from_journal) def set_storage(self, address: Address, slot: int, value: int) -> None: validate_uint256(value, title="Storage Value") validate_uint256(slot, title="Storage Slot") validate_canonical_address(address, title="Storage Address") account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.set(slot, value) def delete_storage(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") self._set_storage_root(address, BLANK_ROOT_HASH) self._wipe_storage(address) def _wipe_storage(self, address: Address) -> None: """ Wipe out the storage, without explicitly handling the storage root update """ account_store = self._get_address_store(address) self._dirty_accounts.add(address) account_store.delete() def _get_address_store(self, address: Address) -> AccountStorageDB: if address in self._account_stores: store = self._account_stores[address] else: storage_root = self._get_storage_root(address) store = AccountStorageDB(self._raw_store_db, storage_root, address) self._account_stores[address] = store return store def _dirty_account_stores(self) -> Iterable[Tuple[Address, AccountStorageDB]]: for address in self._dirty_accounts: store = self._account_stores[address] yield address, store @to_tuple def _get_changed_roots(self) -> Iterable[Tuple[Address, Hash32]]: # list all the accounts that were changed, and their new storage roots for address, store in self._dirty_account_stores(): if store.has_changed_root: yield address, store.get_changed_root() def _get_storage_root(self, address: Address) -> Hash32: account = self._get_account(address) return account.storage_root def _set_storage_root(self, address: Address, new_storage_root: Hash32) -> None: account = self._get_account(address) self._set_account(address, account.copy(storage_root=new_storage_root)) def _validate_flushed_storage(self, address: Address, store: AccountStorageDB) -> None: if store.has_changed_root: actual_storage_root = self._get_storage_root(address) expected_storage_root = store.get_changed_root() if expected_storage_root != actual_storage_root: raise ValidationError( "Storage root was not saved to account before trying to persist roots. " "Account %r had storage %r, but should be %r." % ( address, actual_storage_root, expected_storage_root, ) ) # # Balance # def get_balance(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.balance def set_balance(self, address: Address, balance: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(balance, title="Account Balance") account = self._get_account(address) self._set_account(address, account.copy(balance=balance)) # # Nonce # def get_nonce(self, address: Address) -> int: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.nonce def set_nonce(self, address: Address, nonce: int) -> None: validate_canonical_address(address, title="Storage Address") validate_uint256(nonce, title="Nonce") account = self._get_account(address) self._set_account(address, account.copy(nonce=nonce)) def increment_nonce(self, address: Address) -> None: current_nonce = self.get_nonce(address) self.set_nonce(address, current_nonce + 1) # # Code # def get_code(self, address: Address) -> bytes: validate_canonical_address(address, title="Storage Address") try: return self._journaldb[self.get_code_hash(address)] except KeyError: return b"" def set_code(self, address: Address, code: bytes) -> None: validate_canonical_address(address, title="Storage Address") validate_is_bytes(code, title="Code") account = self._get_account(address) code_hash = keccak(code) self._journaldb[code_hash] = code self._set_account(address, account.copy(code_hash=code_hash)) def get_code_hash(self, address: Address) -> Hash32: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) return account.code_hash def delete_code(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account.copy(code_hash=EMPTY_SHA3)) # # Account Methods # def account_has_code_or_nonce(self, address: Address) -> bool: return self.get_nonce(address) != 0 or self.get_code_hash(address) != EMPTY_SHA3 def delete_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") if address in self._account_cache: del self._account_cache[address] del self._journaltrie[address] self._wipe_storage(address) def account_exists(self, address: Address) -> bool: validate_canonical_address(address, title="Storage Address") return self._journaltrie.get(address, b'') != b'' def touch_account(self, address: Address) -> None: validate_canonical_address(address, title="Storage Address") account = self._get_account(address) self._set_account(address, account) def account_is_empty(self, address: Address) -> bool: return not self.account_has_code_or_nonce(address) and self.get_balance(address) == 0 # # Internal # def _get_account(self, address: Address, from_journal: bool=True) -> Account: if from_journal and address in self._account_cache: return self._account_cache[address] rlp_account = (self._journaltrie if from_journal else self._trie_cache).get(address, b'') if rlp_account: account = rlp.decode(rlp_account, sedes=Account) else: account = Account() if from_journal: self._account_cache[address] = account return account def _set_account(self, address: Address, account: Account) -> None: self._account_cache[address] = account rlp_account = rlp.encode(account, sedes=Account) self._journaltrie[address] = rlp_account # # Record and discard API # def record(self) -> UUID: changeset_id = self._journaldb.record() self._journaltrie.record(changeset_id) for _, store in self._dirty_account_stores(): store.record(changeset_id) return changeset_id def discard(self, changeset: UUID) -> None: self._journaldb.discard(changeset) self._journaltrie.discard(changeset) self._account_cache.clear() for _, store in self._dirty_account_stores(): store.discard(changeset) def commit(self, changeset: UUID) -> None: self._journaldb.commit(changeset) self._journaltrie.commit(changeset) for _, store in self._dirty_account_stores(): store.commit(changeset) def make_state_root(self) -> Hash32: for _, store in self._dirty_account_stores(): store.make_storage_root() for address, storage_root in self._get_changed_roots(): self.logger.debug2( "Updating account 0x%s to storage root 0x%s", address.hex(), storage_root.hex(), ) self._set_storage_root(address, storage_root) self._journaldb.persist() self._journaltrie.persist() return self.state_root def persist(self) -> None: self.make_state_root() # persist storage with self._raw_store_db.atomic_batch() as write_batch: for address, store in self._dirty_account_stores(): self._validate_flushed_storage(address, store) store.persist(write_batch) for address, new_root in self._get_changed_roots(): if new_root not in self._raw_store_db and new_root != BLANK_ROOT_HASH: raise ValidationError( "After persisting storage trie, a root node was not found. " "State root for account 0x%s is missing for hash 0x%s." % ( address.hex(), new_root.hex(), ) ) # reset local storage trackers self._account_stores = {} self._dirty_accounts = set() # persist accounts self._validate_generated_root() with self._raw_store_db.atomic_batch() as write_batch: self._batchtrie.commit_to(write_batch, apply_deletes=False) self._batchdb.commit_to(write_batch, apply_deletes=False) def _validate_generated_root(self) -> None: db_diff = self._journaldb.diff() if len(db_diff): raise ValidationError( "AccountDB had a dirty db when it needed to be clean: %r" % db_diff ) trie_diff = self._journaltrie.diff() if len(trie_diff): raise ValidationError( "AccountDB had a dirty trie when it needed to be clean: %r" % trie_diff ) def _log_pending_accounts(self) -> None: accounts_displayed = set() # type: Set[bytes] queued_changes = self._journaltrie.journal.journal_data.items() # mypy bug for ordered dict reversibility: https://github.com/python/typeshed/issues/2078 for _, accounts in reversed(queued_changes): for address in accounts: if address in accounts_displayed: continue else: accounts_displayed.add(address) account = self._get_account(Address(address)) self.logger.debug2( "Account %s: balance %d, nonce %d, storage root %s, code hash %s", encode_hex(address), account.balance, account.nonce, encode_hex(account.storage_root), encode_hex(account.code_hash), )