def gather_many(self, msgs): """ Gather the data for many compute messages at once Returns ------- good: the input messages for which we have data bad: a dict of task keys for which we could not find data data: The scope in which to run tasks len(remote): the number of new keys we've gathered """ with log_errors(): who_has = merge(msg['who_has'] for msg in msgs if 'who_has' in msg) local = {k: self.data[k] for k in who_has if k in self.data} who_has = {k: v for k, v in who_has.items() if k not in local} remote, bad_data = yield gather_from_workers(who_has, permissive=True, rpc=self.rpc, close=False) if remote: self.data.update(remote) yield self.center.add_keys(address=self.address, keys=list(remote)) data = merge(local, remote) if bad_data: missing = {msg['key']: {k for k in msg['who_has'] if k in bad_data} for msg in msgs if 'who_has' in msg} bad = {k: v for k, v in missing.items() if v} good = [msg for msg in msgs if not missing.get(msg['key'])] else: good, bad = msgs, {} raise Return([good, bad, data, len(remote)])
def pandas_read_csv(self, usecols=None, **kwargs): """ Use pandas.read_csv with the right keyword arguments In particular we know what dtypes should be, which columns are dates, etc... """ dtypes, dates = dshape_to_pandas(self.schema) if usecols: if builtins.all(isinstance(c, int) for c in usecols): usecols = get(usecols, self.columns) dates = [name for name in dates if name in usecols] result = pd.read_csv(self.path, names=kwargs.pop('names', self.columns), usecols=usecols, compression={'gz': 'gzip', 'bz2': 'bz2'}.get(ext(self.path)), dtype=kwargs.pop('dtype', dtypes), parse_dates=kwargs.pop('parse_dates', dates), encoding=kwargs.pop('encoding', self.encoding), header=0 if self.header else None, **merge(kwargs, clean_dialect(self.dialect))) reorder = get(list(usecols)) if usecols and len(usecols) > 1 else identity if isinstance(result, (pd.Series, pd.DataFrame)): return reorder(result) else: return map(reorder, result)
def gather_many(self, msgs): """ Gather the data for many compute messages at once Returns ------- good: the input messages for which we have data bad: a dict of task keys for which we could not find data data: The scope in which to run tasks len(remote): the number of new keys we've gathered """ diagnostics = {} who_has = merge(msg['who_has'] for msg in msgs if 'who_has' in msg) start = time() local = {k: self.data[k] for k in who_has if k in self.data} stop = time() if stop - start > 0.005: diagnostics['disk_load_start'] = start diagnostics['disk_load_stop'] = stop who_has = {k: v for k, v in who_has.items() if k not in local} start = time() remote, bad_data = yield gather_from_workers(who_has, permissive=True) if remote: self.data.update(remote) yield self.scheduler.add_keys(address=self.address, keys=list(remote)) stop = time() if remote: diagnostics['transfer_start'] = start diagnostics['transfer_stop'] = stop data = merge(local, remote) if bad_data: missing = {msg['key']: {k for k in msg['who_has'] if k in bad_data} for msg in msgs if 'who_has' in msg} bad = {k: v for k, v in missing.items() if v} good = [msg for msg in msgs if not missing.get(msg['key'])] else: good, bad = msgs, {} raise Return([good, bad, data, len(remote), diagnostics])
def generate_go_ethereum_fixture(destination_dir): with contextlib.ExitStack() as stack: datadir = stack.enter_context(tempdir()) keystore_dir = os.path.join(datadir, 'keystore') ensure_path_exists(keystore_dir) keyfile_path = os.path.join(keystore_dir, KEYFILE_FILENAME) with open(keyfile_path, 'w') as keyfile: keyfile.write(KEYFILE_DATA) genesis_file_path = os.path.join(datadir, 'genesis.json') with open(genesis_file_path, 'w') as genesis_file: genesis_file.write(json.dumps(GENESIS_DATA)) geth_ipc_path_dir = stack.enter_context(tempdir()) geth_ipc_path = os.path.join(geth_ipc_path_dir, 'geth.ipc') geth_port = get_open_port() geth_binary = get_geth_binary() with get_geth_process( geth_binary=geth_binary, datadir=datadir, genesis_file_path=genesis_file_path, geth_ipc_path=geth_ipc_path, geth_port=geth_port): wait_for_socket(geth_ipc_path) web3 = Web3(Web3.IPCProvider(geth_ipc_path)) chain_data = setup_chain_state(web3) # close geth by exiting context # must be closed before copying data dir verify_chain_state(web3, chain_data) # verify that chain state is still valid after closing # and re-opening geth with get_geth_process( geth_binary=geth_binary, datadir=datadir, genesis_file_path=genesis_file_path, geth_ipc_path=geth_ipc_path, geth_port=geth_port): wait_for_socket(geth_ipc_path) web3 = Web3(Web3.IPCProvider(geth_ipc_path)) verify_chain_state(web3, chain_data) static_data = { 'raw_txn_account': RAW_TXN_ACCOUNT, 'keyfile_pw': KEYFILE_PW, } config = merge(chain_data, static_data) pprint.pprint(config) write_config_json(config, datadir) shutil.copytree(datadir, destination_dir)
def _get_reader(self, header=None, keep_default_na=False, na_values=na_values, chunksize=None, **kwargs): kwargs.setdefault('skiprows', int(bool(self.header))) dialect = merge(keyfilter(read_csv_kwargs.__contains__, self.dialect), kwargs) # handle windows if dialect['lineterminator'] == '\r\n': dialect['lineterminator'] = None return partial(pd.read_csv, chunksize=chunksize, na_values=na_values, keep_default_na=keep_default_na, encoding=self.encoding, header=header, **dialect)
def add_transaction(self, transaction, computation, block): """ Add a transaction to the given block and return `trie_data` to store the transaction data in chaindb in VM layer. Update the bloom_filter, transaction trie and receipt trie roots, bloom_filter, bloom, and used_gas of the block. :param transaction: the executed transaction :param computation: the Computation object with executed result :param block: the Block which the transaction is added in :type transaction: Transaction :type computation: Computation :type block: Block :return: the block and the trie_data :rtype: (Block, dict[bytes, bytes]) """ receipt = self.make_receipt(transaction, computation) self.add_receipt(receipt) # Create a new Block object block_header = block.header.clone() transactions = list(block.transactions) block = self.block_class(block_header, transactions) block.transactions.append(transaction) # Get trie roots and changed key-values. tx_root_hash, tx_kv_nodes = make_trie_root_and_nodes( block.transactions, self.trie_class, ) receipt_root_hash, receipt_kv_nodes = make_trie_root_and_nodes( self.receipts, self.trie_class, ) trie_data = merge(tx_kv_nodes, receipt_kv_nodes) block.bloom_filter |= receipt.bloom block.header.transaction_root = tx_root_hash block.header.receipt_root = receipt_root_hash block.header.bloom = int(block.bloom_filter) block.header.gas_used = receipt.gas_used return block, trie_data
def fill_transaction_defaults(web3, transaction): ''' if web3 is None, fill as much as possible while offline ''' defaults = {} for key, default_getter in TRANSACTION_DEFAULTS.items(): if key not in transaction: if callable(default_getter): if web3 is not None: default_val = default_getter(web3, transaction) else: raise ValueError("You must specify %s in the transaction" % key) else: default_val = default_getter defaults[key] = default_val return merge(defaults, transaction)
def test_curried_namespace(): exceptions = import_module('cytoolz.curried.exceptions') namespace = {} def should_curry(func): if not callable(func) or isinstance(func, cytoolz.curry): return False nargs = cytoolz.functoolz.num_required_args(func) if nargs is None or nargs > 1: return True return nargs == 1 and cytoolz.functoolz.has_keywords(func) def curry_namespace(ns): return dict( (name, cytoolz.curry(f) if should_curry(f) else f) for name, f in ns.items() if '__' not in name ) from_cytoolz = curry_namespace(vars(cytoolz)) from_exceptions = curry_namespace(vars(exceptions)) namespace.update(cytoolz.merge(from_cytoolz, from_exceptions)) namespace = cytoolz.valfilter(callable, namespace) curried_namespace = cytoolz.valfilter(callable, cytoolz.curried.__dict__) if namespace != curried_namespace: missing = set(namespace) - set(curried_namespace) if missing: raise AssertionError('There are missing functions in cytoolz.curried:\n %s' % ' \n'.join(sorted(missing))) extra = set(curried_namespace) - set(namespace) if extra: raise AssertionError('There are extra functions in cytoolz.curried:\n %s' % ' \n'.join(sorted(extra))) unequal = cytoolz.merge_with(list, namespace, curried_namespace) unequal = cytoolz.valfilter(lambda x: x[0] != x[1], unequal) messages = [] for name, (orig_func, auto_func) in sorted(unequal.items()): if name in from_exceptions: messages.append('%s should come from cytoolz.curried.exceptions' % name) elif should_curry(getattr(cytoolz, name)): messages.append('%s should be curried from cytoolz' % name) else: messages.append('%s should come from cytoolz and NOT be curried' % name) raise AssertionError('\n'.join(messages))
def reader(self, header=None, keep_default_na=False, na_values=na_values, chunksize=None, **kwargs): kwargs.setdefault('skiprows', int(bool(self.header))) dialect = merge(keyfilter(read_csv_kwargs.__contains__, self.dialect), kwargs) filename, ext = os.path.splitext(self.path) ext = ext.lstrip('.') # handle windows if dialect['lineterminator'] == '\r\n': dialect['lineterminator'] = None reader = pd.read_csv(self.path, compression={'gz': 'gzip', 'bz2': 'bz2'}.get(ext), chunksize=chunksize, na_values=na_values, keep_default_na=keep_default_na, encoding=self.encoding, header=header, **dialect) return reader
def requirejson_wrapper(*args, **kwargs): # TODO(vishesh): malformed JSON gives 500 error, should give 400, # can't seem to catch the ValueError from json.loads try: # GET/DELETE have no body. PUT/PATCH/POST have bodies. r = None if (request.method in ['GET', 'DELETE'] or (request.method == 'POST' and 'json' not in request.content_type)): r = {k: request.params[k] for k in request.params} else: r = request.json except ValueError as e: jsonabort(400, ('Request should be parseable json, got error: ' ''+str(e.args))) if r == None: # the only time that r will be None is if the json part fails. # request.params being empty will give an empty dictionary instead, # so this logic is okay (don't need to change the expected # content-type based on the request method). jsonabort(400, ('Content-Type should be application/json, got ' ''+str(request.content_type))) if type(r) is not dict: jsonabort(400, 'Request must be a JSON object, not {}'.format( typename(r))) if not all(k in r for k in keys): jsonabort(400, 'Request is missing keys: ' + str(list(set(keys) - set(r.keys())))) if strict and not all(p in keys or p in opts for p in r): # since we know that all k in keys is present in r # if the lengths are unequal then for sure there are extra keys. jsonabort(400, 'Strict mode: request has unrecognized keys: ' + str(list(set(r.keys()) - set(keys)))) # instead of modifying/using global state, choosing to pass in # the updated request as a param means that the handler functions # are all pure functions of their input params. # # This should make testing them easier - it's one less thing to mock. return req_fun(t.merge(opts, r), *args, **kwargs)
def get(self, stream=None, name=None, client=None, timeout=None): start = time() while name not in self.variables: if timeout is not None: left = timeout - (time() - start) else: left = None if left and left < 0: raise gen.TimeoutError() yield self.started.wait(timeout=left) record = self.variables[name] if record['type'] == 'Future': key = record['value'] token = uuid.uuid4().hex ts = self.scheduler.tasks.get(key) state = ts.state if ts is not None else 'lost' msg = {'token': token, 'state': state} if state == 'erred': msg['exception'] = ts.exception_blame.exception msg['traceback'] = ts.exception_blame.traceback record = merge(record, msg) self.waiting[key, name].add(token) raise gen.Return(record)
def generate_go_ethereum_fixture(destination_dir): with contextlib.ExitStack() as stack: datadir = stack.enter_context(tempdir()) keystore_dir = os.path.join(datadir, 'keystore') ensure_path_exists(keystore_dir) keyfile_path = os.path.join(keystore_dir, KEYFILE_FILENAME) with open(keyfile_path, 'w') as keyfile: keyfile.write(KEYFILE_DATA) genesis_file_path = os.path.join(datadir, 'genesis.json') with open(genesis_file_path, 'w') as genesis_file: genesis_file.write(json.dumps(GENESIS_DATA)) geth_ipc_path_dir = stack.enter_context(tempdir()) geth_ipc_path = os.path.join(geth_ipc_path_dir, 'geth.ipc') geth_port = get_open_port() geth_binary = get_geth_binary() geth_proc = stack.enter_context(get_geth_process( # noqa: F841 geth_binary=geth_binary, datadir=datadir, genesis_file_path=genesis_file_path, geth_ipc_path=geth_ipc_path, geth_port=geth_port, )) wait_for_socket(geth_ipc_path) web3 = Web3(Web3.IPCProvider(geth_ipc_path)) chain_data = setup_chain_state(web3) static_data = { 'raw_txn_account': RAW_TXN_ACCOUNT, 'keyfile_pw': KEYFILE_PW, } pprint.pprint(merge(chain_data, static_data)) shutil.copytree(datadir, destination_dir)
def compserver(datasets): if request.headers['content-type'] != 'application/json': return ("Expected JSON data", 404) try: data = json.loads(request.data) except ValueError: return ("Bad JSON. Got %s " % request.data, 404) tree_ns = dict((name, Symbol(name, discover(datasets[name]))) for name in datasets) if 'namespace' in data: tree_ns = merge(tree_ns, data['namespace']) expr = from_tree(data['expr'], namespace=tree_ns) compute_ns = dict((Symbol(name, discover(datasets[name])), datasets[name]) for name in datasets) result = compute(expr, compute_ns) if iscollection(expr.dshape): result = into(list, result) return jsonify({'datashape': str(expr.dshape), 'data': result})
from cytoolz import ( merge, ) from hvm import precompiles from hvm.utils.address import ( force_bytes_to_address, ) from hvm.vm.forks.frontier.computation import FRONTIER_PRECOMPILES from hvm.vm.forks.spurious_dragon.computation import SpuriousDragonComputation from .opcodes import BYZANTIUM_OPCODES BYZANTIUM_PRECOMPILES = merge( FRONTIER_PRECOMPILES, { force_bytes_to_address(b'\x05'): precompiles.modexp, force_bytes_to_address(b'\x06'): precompiles.ecadd, force_bytes_to_address(b'\x07'): precompiles.ecmul, force_bytes_to_address(b'\x08'): precompiles.ecpairing, }, ) class ByzantiumComputation(SpuriousDragonComputation): """ A class for all execution computations in the ``Byzantium`` fork. Inherits from :class:`~hvm.vm.forks.spurious_dragon.computation.SpuriousDragonComputation` """ # Override opcodes = BYZANTIUM_OPCODES _precompiles = BYZANTIUM_PRECOMPILES
def generate_parity_fixture(destination_dir): """ The parity fixture generation strategy is to start a geth client with existing fixtures copied into a temp datadir. Then a parity client is started is peered with the geth client. """ with contextlib.ExitStack() as stack: geth_datadir = stack.enter_context(common.tempdir()) geth_port = common.get_open_port() geth_ipc_path_dir = stack.enter_context(common.tempdir()) geth_ipc_path = os.path.join(geth_ipc_path_dir, 'geth.ipc') geth_keystore_dir = os.path.join(geth_datadir, 'keystore') common.ensure_path_exists(geth_keystore_dir) geth_keyfile_path = os.path.join(geth_keystore_dir, common.KEYFILE_FILENAME) with open(geth_keyfile_path, 'w') as keyfile: keyfile.write(common.KEYFILE_DATA) genesis_file_path = os.path.join(geth_datadir, 'genesis.json') with open(genesis_file_path, 'w') as genesis_file: genesis_file.write(json.dumps(common.GENESIS_DATA)) stack.enter_context( common.get_geth_process( common.get_geth_binary(), geth_datadir, genesis_file_path, geth_ipc_path, geth_port, str(CHAIN_CONFIG['params']['networkID'])) ) # set up fixtures common.wait_for_socket(geth_ipc_path) web3_geth = Web3(Web3.IPCProvider(geth_ipc_path)) chain_data = go_ethereum.setup_chain_state(web3_geth) fixture_block_count = web3_geth.eth.blockNumber datadir = stack.enter_context(common.tempdir()) keystore_dir = os.path.join(datadir, 'keys') os.makedirs(keystore_dir, exist_ok=True) parity_keyfile_path = os.path.join(keystore_dir, common.KEYFILE_FILENAME) with open(parity_keyfile_path, 'w') as keyfile: keyfile.write(common.KEYFILE_DATA) chain_config_file_path = os.path.join(datadir, 'chain_config.json') with open(chain_config_file_path, 'w') as chain_file: chain_file.write(json.dumps(CHAIN_CONFIG)) parity_ipc_path_dir = stack.enter_context(common.tempdir()) parity_ipc_path = os.path.join(parity_ipc_path_dir, 'jsonrpc.ipc') parity_port = common.get_open_port() parity_binary = get_parity_binary() parity_proc = stack.enter_context(get_parity_process( # noqa: F841 parity_binary=parity_binary, datadir=datadir, ipc_path=parity_ipc_path, keys_path=keystore_dir, chain_config_file_path=chain_config_file_path, parity_port=parity_port, )) common.wait_for_socket(parity_ipc_path) web3 = Web3(Web3.IPCProvider(parity_ipc_path)) time.sleep(10) connect_nodes(web3, web3_geth) wait_for_chain_sync(web3, fixture_block_count) static_data = { 'raw_txn_account': common.RAW_TXN_ACCOUNT, 'keyfile_pw': common.KEYFILE_PW, } pprint.pprint(merge(chain_data, static_data)) shutil.copytree(datadir, destination_dir) parity_proc = stack.enter_context(parity_export_blocks_process( # noqa: F841 parity_binary=parity_binary, datadir=destination_dir, chain_config_file_path=os.path.join(destination_dir, 'chain_config.json'), parity_port=parity_port, ))
from .constants import ( GAS_EXP_EIP160, GAS_EXPBYTE_EIP160 ) UPDATED_OPCODES = { opcode_values.EXP: as_opcode( logic_fn=arithmetic.exp(gas_per_byte=GAS_EXPBYTE_EIP160), mnemonic=mnemonics.EXP, gas_cost=GAS_EXP_EIP160, ), opcode_values.SELFDESTRUCT: as_opcode( logic_fn=system.selfdestruct_eip161, mnemonic=mnemonics.SELFDESTRUCT, gas_cost=GAS_SELFDESTRUCT_EIP150, ), opcode_values.CALL: call.CallEIP161.configure( name='opcode:CALL', mnemonic=mnemonics.CALL, gas_cost=GAS_CALL_EIP150, )(), } SPURIOUS_DRAGON_OPCODES = merge( copy.deepcopy(TANGERINE_WHISTLE_OPCODES), UPDATED_OPCODES, )
def apply_gufunc(func, signature, *args, **kwargs): """ Apply a generalized ufunc or similar python function to arrays. ``signature`` determines if the function consumes or produces core dimensions. The remaining dimensions in given input arrays (``*args``) are considered loop dimensions and are required to broadcast naturally against each other. In other terms, this function is like np.vectorize, but for the blocks of dask arrays. If the function itself shall also be vectorized use ``vectorize=True`` for convenience. Parameters ---------- func : callable Function to call like ``func(*args, **kwargs)`` on input arrays (``*args``) that returns an array or tuple of arrays. If multiple arguments with non-matching dimensions are supplied, this function is expected to vectorize (broadcast) over axes of positional arguments in the style of NumPy universal functions [1]_ (if this is not the case, set ``vectorize=True``). If this function returns multiple outputs, ``output_core_dims`` has to be set as well. signature: string Specifies what core dimensions are consumed and produced by ``func``. According to the specification of numpy.gufunc signature [2]_ *args : numeric Input arrays or scalars to the callable function. axes: List of tuples, optional, keyword only A list of tuples with indices of axes a generalized ufunc should operate on. For instance, for a signature of ``"(i,j),(j,k)->(i,k)"`` appropriate for matrix multiplication, the base elements are two-dimensional matrices and these are taken to be stored in the two last axes of each argument. The corresponding axes keyword would be ``[(-2, -1), (-2, -1), (-2, -1)]``. For simplicity, for generalized ufuncs that operate on 1-dimensional arrays (vectors), a single integer is accepted instead of a single-element tuple, and for generalized ufuncs for which all outputs are scalars, the output tuples can be omitted. axis: int, optional, keyword only A single axis over which a generalized ufunc should operate. This is a short-cut for ufuncs that operate over a single, shared core dimension, equivalent to passing in axes with entries of (axis,) for each single-core-dimension argument and ``()`` for all others. For instance, for a signature ``"(i),(i)->()"``, it is equivalent to passing in ``axes=[(axis,), (axis,), ()]``. keepdims: bool, optional, keyword only If this is set to True, axes which are reduced over will be left in the result as a dimension with size one, so that the result will broadcast correctly against the inputs. This option can only be used for generalized ufuncs that operate on inputs that all have the same number of core dimensions and with outputs that have no core dimensions , i.e., with signatures like ``"(i),(i)->()"`` or ``"(m,m)->()"``. If used, the location of the dimensions in the output can be controlled with axes and axis. output_dtypes : Optional, dtype or list of dtypes, keyword only Valid numpy dtype specification or list thereof. If not given, a call of ``func`` with a small set of data is performed in order to try to automatically determine the output dtypes. output_sizes : dict, optional, keyword only Optional mapping from dimension names to sizes for outputs. Only used if new core dimensions (not found on inputs) appear on outputs. vectorize: bool, keyword only If set to ``True``, ``np.vectorize`` is applied to ``func`` for convenience. Defaults to ``False``. allow_rechunk: Optional, bool, keyword only Allows rechunking, otherwise chunk sizes need to match and core dimensions are to consist only of one chunk. Warning: enabling this can increase memory usage significantly. Defaults to ``False``. **kwargs : dict Extra keyword arguments to pass to `func` Returns ------- Single dask.array.Array or tuple of dask.array.Array Examples -------- >>> import dask.array as da >>> import numpy as np >>> def stats(x): ... return np.mean(x, axis=-1), np.std(x, axis=-1) >>> a = da.random.normal(size=(10,20,30), chunks=(5, 10, 30)) >>> mean, std = da.apply_gufunc(stats, "(i)->(),()", a) >>> mean.compute().shape (10, 20) >>> def outer_product(x, y): ... return np.einsum("i,j->ij", x, y) >>> a = da.random.normal(size=( 20,30), chunks=(10, 30)) >>> b = da.random.normal(size=(10, 1,40), chunks=(5, 1, 40)) >>> c = da.apply_gufunc(outer_product, "(i),(j)->(i,j)", a, b, vectorize=True) >>> c.compute().shape (10, 20, 30, 40) References ---------- .. [1] https://docs.scipy.org/doc/numpy/reference/ufuncs.html .. [2] https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html """ axes = kwargs.pop("axes", None) axis = kwargs.pop("axis", None) keepdims = kwargs.pop("keepdims", False) output_dtypes = kwargs.pop("output_dtypes", None) output_sizes = kwargs.pop("output_sizes", None) vectorize = kwargs.pop("vectorize", None) allow_rechunk = kwargs.pop("allow_rechunk", False) # Input processing: ## Signature if not isinstance(signature, str): raise TypeError("`signature` has to be of type string") input_coredimss, output_coredimss = _parse_gufunc_signature(signature) ## Determine nout: nout = None for functions of one direct return; nout = int for return tuples nout = None if not isinstance(output_coredimss, list) else len(output_coredimss) ## Determine and handle output_dtypes if output_dtypes is None: if vectorize: tempfunc = np.vectorize(func, signature=signature) else: tempfunc = func output_dtypes = apply_infer_dtype(tempfunc, args, kwargs, "apply_gufunc", "output_dtypes", nout) if isinstance(output_dtypes, (tuple, list)): if nout is None: if len(output_dtypes) > 1: raise ValueError( ("Must specify single dtype or list of one dtype " "for `output_dtypes` for function with one output")) otypes = output_dtypes output_dtypes = output_dtypes[0] else: otypes = output_dtypes else: if nout is not None: raise ValueError( "Must specify tuple of dtypes for `output_dtypes` for function with multiple outputs" ) otypes = [output_dtypes] ## Vectorize function, if required if vectorize: func = np.vectorize(func, signature=signature, otypes=otypes) ## Miscellaneous if output_sizes is None: output_sizes = {} ## Axes input_axes, output_axes = _validate_normalize_axes(axes, axis, keepdims, input_coredimss, output_coredimss) # Main code: ## Cast all input arrays to dask args = [asarray(a) for a in args] if len(input_coredimss) != len(args): ValueError( "According to `signature`, `func` requires %d arguments, but %s given" % (len(input_coredimss), len(args))) ## Axes: transpose input arguments transposed_args = [] for arg, iax, input_coredims in zip(args, input_axes, input_coredimss): shape = arg.shape iax = tuple(a if a < 0 else a - len(shape) for a in iax) tidc = tuple(i for i in range(-len(shape) + 0, 0) if i not in iax) + iax transposed_arg = arg.transpose(tidc) transposed_args.append(transposed_arg) args = transposed_args ## Assess input args for loop dims input_shapes = [a.shape for a in args] input_chunkss = [a.chunks for a in args] num_loopdims = [ len(s) - len(cd) for s, cd in zip(input_shapes, input_coredimss) ] max_loopdims = max(num_loopdims) if num_loopdims else None core_input_shapes = [ dict(zip(icd, s[n:])) for s, n, icd in zip(input_shapes, num_loopdims, input_coredimss) ] core_shapes = merge(*core_input_shapes) core_shapes.update(output_sizes) loop_input_dimss = [ tuple("__loopdim%d__" % d for d in range(max_loopdims - n, max_loopdims)) for n in num_loopdims ] input_dimss = [l + c for l, c in zip(loop_input_dimss, input_coredimss)] loop_output_dims = max(loop_input_dimss, key=len) if loop_input_dimss else tuple() ## Assess input args for same size and chunk sizes ### Collect sizes and chunksizes of all dims in all arrays dimsizess = {} chunksizess = {} for dims, shape, chunksizes in zip(input_dimss, input_shapes, input_chunkss): for dim, size, chunksize in zip(dims, shape, chunksizes): dimsizes = dimsizess.get(dim, []) dimsizes.append(size) dimsizess[dim] = dimsizes chunksizes_ = chunksizess.get(dim, []) chunksizes_.append(chunksize) chunksizess[dim] = chunksizes_ ### Assert correct partitioning, for case: for dim, sizes in dimsizess.items(): #### Check that the arrays have same length for same dimensions or dimension `1` if set(sizes).union({1}) != {1, max(sizes)}: raise ValueError( "Dimension `'{}'` with different lengths in arrays".format( dim)) if not allow_rechunk: chunksizes = chunksizess[dim] #### Check if core dimensions consist of only one chunk if (dim in core_shapes) and (chunksizes[0][0] < core_shapes[dim]): raise ValueError( "Core dimension `'{}'` consists of multiple chunks. To fix, rechunk into a single \ chunk along this dimension or set `allow_rechunk=True`, but beware that this may increase memory usage \ significantly.".format(dim)) #### Check if loop dimensions consist of same chunksizes, when they have sizes > 1 relevant_chunksizes = list( unique(c for s, c in zip(sizes, chunksizes) if s > 1)) if len(relevant_chunksizes) > 1: raise ValueError( "Dimension `'{}'` with different chunksize present".format( dim)) ## Apply function - use blockwise here arginds = list(concat(zip(args, input_dimss))) ### Use existing `blockwise` but only with loopdims to enforce ### concatenation for coredims that appear also at the output ### Modifying `blockwise` could improve things here. try: tmp = blockwise( # First try to compute meta func, loop_output_dims, *arginds, concatenate=True, **kwargs) except ValueError: # If computing meta doesn't work, provide it explicitly based on # provided dtypes sample = arginds[0]._meta if isinstance(output_dtypes, tuple): meta = tuple( meta_from_array(sample, dtype=odt) for ocd, odt in zip(output_coredimss, output_dtypes)) else: meta = tuple( meta_from_array(sample, dtype=odt) for ocd, odt in zip((output_coredimss, ), (output_dtypes, ))) tmp = blockwise(func, loop_output_dims, *arginds, concatenate=True, meta=meta, **kwargs) if isinstance(tmp._meta, tuple): metas = tmp._meta else: metas = (tmp._meta, ) ## Prepare output shapes loop_output_shape = tmp.shape loop_output_chunks = tmp.chunks keys = list(flatten(tmp.__dask_keys__())) name, token = keys[0][0].split("-") ### *) Treat direct output if nout is None: output_coredimss = [output_coredimss] output_dtypes = [output_dtypes] ## Split output leaf_arrs = [] for i, (ocd, odt, oax, meta) in enumerate( zip(output_coredimss, output_dtypes, output_axes, metas)): core_output_shape = tuple(core_shapes[d] for d in ocd) core_chunkinds = len(ocd) * (0, ) output_shape = loop_output_shape + core_output_shape output_chunks = loop_output_chunks + core_output_shape leaf_name = "%s_%d-%s" % (name, i, token) leaf_dsk = {(leaf_name, ) + key[1:] + core_chunkinds: ((getitem, key, i) if nout else key) for key in keys} graph = HighLevelGraph.from_collections(leaf_name, leaf_dsk, dependencies=[tmp]) meta = meta_from_array(meta, len(output_shape), dtype=odt) leaf_arr = Array(graph, leaf_name, chunks=output_chunks, shape=output_shape, meta=meta) ### Axes: if keepdims: slices = len( leaf_arr.shape) * (slice(None), ) + len(oax) * (np.newaxis, ) leaf_arr = leaf_arr[slices] tidcs = [None] * len(leaf_arr.shape) for i, oa in zip(range(-len(oax), 0), oax): tidcs[oa] = i j = 0 for i in range(len(tidcs)): if tidcs[i] is None: tidcs[i] = j j += 1 leaf_arr = leaf_arr.transpose(tidcs) leaf_arrs.append(leaf_arr) return leaf_arrs if nout else leaf_arrs[0] # Undo *) from above
def test_apply_transaction(chain_without_block_validation): # noqa: F811 chain = chain_without_block_validation # noqa: F811 # Don't change these variables vm = chain.get_vm() chaindb = copy.deepcopy(vm.chaindb) block0 = copy.deepcopy(vm.block) prev_block_hash = chain.get_canonical_block_by_number(0).hash initial_state_root = vm.state.block_header.state_root # (1) Get VM.apply_transaction(transaction) result for assertion # The first transaction chain1 = copy.deepcopy(chain) vm_example = chain1.get_vm() vm_example._is_stateless = False # Only for testing recipient1 = decode_hex('0x1111111111111111111111111111111111111111') amount = 100 from_ = chain.funded_address tx1 = new_transaction( vm_example, from_, recipient1, amount, private_key=chain.funded_address_private_key, ) computation, result_block = vm_example.apply_transaction(tx1) # The second transaction recipient2 = decode_hex('0x2222222222222222222222222222222222222222') tx2 = new_transaction( vm_example, from_, recipient2, amount, private_key=chain.funded_address_private_key, ) computation, result_block = vm_example.apply_transaction(tx2) assert len(result_block.transactions) == 2 # (2) Test VMState.apply_transaction(...) # Use FrontierVMState to apply transaction chaindb1 = copy.deepcopy(chaindb) block1 = copy.deepcopy(block0) block_header1 = block1.header prev_headers = vm.get_prev_headers( last_block_hash=prev_block_hash, db=vm.chaindb, ) vm_state1 = FrontierVMState( chaindb=chaindb1, block_header=block_header1, prev_headers=prev_headers, receipts=[], ) parent_header = copy.deepcopy(prev_headers[0]) computation, block, _ = vm_state1.apply_transaction( tx1, block1, ) access_logs1 = computation.vm_state.access_logs # Check if prev_headers hasn't been changed assert parent_header.hash == prev_headers[0].hash # Make sure that block1 hasn't been changed assert block1.header.state_root == initial_state_root vm_state1 = FrontierVMState( chaindb=chaindb1, block_header=block.header, prev_headers=prev_headers, receipts=computation.vm_state.receipts, ) computation, block, _ = vm_state1.apply_transaction( tx2, block, ) access_logs2 = computation.vm_state.access_logs post_vm_state = computation.vm_state # Check AccessLogs witness_db = BaseChainDB(MemoryDB(access_logs2.writes)) state_db = witness_db.get_state_db(block.header.state_root, read_only=True) assert state_db.get_balance(recipient2) == amount with pytest.raises(KeyError): _ = state_db.get_balance(recipient1) # Check block data are correct assert block.header.state_root == result_block.header.state_root assert block.header.gas_limit == result_block.header.gas_limit assert block.header.gas_used == result_block.header.gas_used assert block.header.transaction_root == result_block.header.transaction_root assert block.header.receipt_root == result_block.header.receipt_root # Make sure that vm_state1 hasn't been changed assert post_vm_state.block_header.state_root == result_block.header.state_root # (3) Testing using witness as db data # Witness_db block2 = copy.deepcopy(block0) block_header2 = block2.header witness_db = BaseChainDB(MemoryDB(access_logs1.reads)) prev_headers = vm.get_prev_headers( last_block_hash=prev_block_hash, db=vm.chaindb, ) # Apply the first transaction vm_state2 = FrontierVMState( chaindb=witness_db, block_header=block_header2, prev_headers=prev_headers, receipts=[], ) computation, block, _ = vm_state2.apply_transaction( tx1, block2, ) # Update witness_db recent_trie_nodes = merge(access_logs2.reads, access_logs1.writes) witness_db = BaseChainDB(MemoryDB(recent_trie_nodes)) # Apply the second transaction vm_state2 = FrontierVMState( chaindb=witness_db, block_header=block.header, prev_headers=prev_headers, receipts=computation.vm_state.receipts, ) computation, block, _ = vm_state2.apply_transaction( tx2, block, ) # After applying assert block.header.state_root == computation.vm_state.block_header.state_root assert block.header.transaction_root == result_block.header.transaction_root assert block.header.receipt_root == result_block.header.receipt_root assert block.hash == result_block.hash # (3) Testing using witness_db and block_header to reconstruct vm_state prev_headers = vm.get_prev_headers( last_block_hash=prev_block_hash, db=vm.chaindb, ) vm_state3 = FrontierVMState( chaindb=witness_db, block_header=block.header, prev_headers=prev_headers, ) assert vm_state3.block_header.state_root == post_vm_state.block_header.state_root assert vm_state3.block_header.state_root == result_block.header.state_root
def rewrite_blockwise(inputs): """ Rewrite a stack of Blockwise expressions into a single blockwise expression Given a set of Blockwise layers, combine them into a single layer. The provided layers are expected to fit well together. That job is handled by ``optimize_blockwise`` Parameters ---------- inputs : List[Blockwise] Returns ------- blockwise: Blockwise See Also -------- optimize_blockwise """ inputs = {inp.output: inp for inp in inputs} dependencies = { inp.output: {d for d, v in inp.indices if v is not None and d in inputs} for inp in inputs.values() } dependents = core.reverse_dict(dependencies) new_index_iter = ( c + (str(d) if d else '') # A, B, ... A1, B1, ... for d in itertools.count() for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') [root] = [k for k, v in dependents.items() if not v] # Our final results. These will change during fusion below indices = list(inputs[root].indices) new_axes = inputs[root].new_axes concatenate = inputs[root].concatenate dsk = dict(inputs[root].dsk) changed = True while changed: changed = False for i, (dep, ind) in enumerate(indices): if ind is None: continue if dep not in inputs: continue changed = True # Replace _n with dep name in existing tasks # (inc, _0) -> (inc, 'b') dsk = { k: subs(v, {blockwise_token(i): dep}) for k, v in dsk.items() } # Remove current input from input indices # [('a', 'i'), ('b', 'i')] -> [('a', 'i')] _, current_dep_indices = indices.pop(i) sub = { blockwise_token(i): blockwise_token(i - 1) for i in range(i + 1, len(indices) + 1) } dsk = subs(dsk, sub) # Change new input_indices to match give index from current computation # [('c', j')] -> [('c', 'i')] new_indices = inputs[dep].indices sub = dict(zip(inputs[dep].output_indices, current_dep_indices)) contracted = { x for _, j in new_indices if j is not None for x in j if x not in inputs[dep].output_indices } extra = dict(zip(contracted, new_index_iter)) sub.update(extra) new_indices = [(x, index_subs(j, sub)) for x, j in new_indices] # Update new_axes for k, v in inputs[dep].new_axes.items(): new_axes[sub[k]] = v # Bump new inputs up in list sub = {} for i, index in enumerate(new_indices): try: contains = index in indices except (ValueError, TypeError): contains = False if contains: # use old inputs if available sub[blockwise_token(i)] = blockwise_token( indices.index(index)) else: sub[blockwise_token(i)] = blockwise_token(len(indices)) indices.append(index) new_dsk = subs(inputs[dep].dsk, sub) # indices.extend(new_indices) dsk.update(new_dsk) indices = [(a, tuple(b) if isinstance(b, list) else b) for a, b in indices] # De-duplicate indices like [(a, ij), (b, i), (a, ij)] -> [(a, ij), (b, i)] # Make sure that we map everything else appropriately as we remove inputs new_indices = [] seen = {} sub = {} # like {_0: _0, _1: _0, _2: _1} for i, x in enumerate(indices): if x[1] is not None and x in seen: sub[i] = seen[x] else: if x[1] is not None: seen[x] = len(new_indices) sub[i] = len(new_indices) new_indices.append(x) sub = {blockwise_token(k): blockwise_token(v) for k, v in sub.items()} dsk = {k: subs(v, sub) for k, v in dsk.items()} indices_check = {k for k, v in indices if v is not None} numblocks = toolz.merge([inp.numblocks for inp in inputs.values()]) numblocks = { k: v for k, v in numblocks.items() if v is None or k in indices_check } out = Blockwise(root, inputs[root].output_indices, dsk, new_indices, numblocks=numblocks, new_axes=new_axes, concatenate=concatenate) return out
def rewrite_blockwise(inputs): """ Rewrite a stack of Blockwise expressions into a single blockwise expression Given a set of Blockwise layers, combine them into a single layer. The provided layers are expected to fit well together. That job is handled by ``optimize_blockwise`` Parameters ---------- inputs : List[Blockwise] Returns ------- blockwise: Blockwise See Also -------- optimize_blockwise """ inputs = {inp.output: inp for inp in inputs} dependencies = {inp.output: {d for d, v in inp.indices if v is not None and d in inputs} for inp in inputs.values()} dependents = core.reverse_dict(dependencies) new_index_iter = (c + (str(d) if d else '') # A, B, ... A1, B1, ... for d in itertools.count() for c in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ') [root] = [k for k, v in dependents.items() if not v] # Our final results. These will change during fusion below indices = list(inputs[root].indices) new_axes = inputs[root].new_axes concatenate = inputs[root].concatenate dsk = dict(inputs[root].dsk) changed = True while changed: changed = False for i, (dep, ind) in enumerate(indices): if ind is None: continue if dep not in inputs: continue changed = True # Replace _n with dep name in existing tasks # (inc, _0) -> (inc, 'b') dsk = {k: subs(v, {blockwise_token(i): dep}) for k, v in dsk.items()} # Remove current input from input indices # [('a', 'i'), ('b', 'i')] -> [('a', 'i')] _, current_dep_indices = indices.pop(i) sub = {blockwise_token(i): blockwise_token(i - 1) for i in range(i + 1, len(indices) + 1)} dsk = subs(dsk, sub) # Change new input_indices to match give index from current computation # [('c', j')] -> [('c', 'i')] new_indices = inputs[dep].indices sub = dict(zip(inputs[dep].output_indices, current_dep_indices)) contracted = {x for _, j in new_indices if j is not None for x in j if x not in inputs[dep].output_indices} extra = dict(zip(contracted, new_index_iter)) sub.update(extra) new_indices = [(x, index_subs(j, sub)) for x, j in new_indices] # Update new_axes for k, v in inputs[dep].new_axes.items(): new_axes[sub[k]] = v # Bump new inputs up in list sub = {} for i, index in enumerate(new_indices): try: contains = index in indices except (ValueError, TypeError): contains = False if contains: # use old inputs if available sub[blockwise_token(i)] = blockwise_token(indices.index(index)) else: sub[blockwise_token(i)] = blockwise_token(len(indices)) indices.append(index) new_dsk = subs(inputs[dep].dsk, sub) # indices.extend(new_indices) dsk.update(new_dsk) indices = [(a, tuple(b) if isinstance(b, list) else b) for a, b in indices] # De-duplicate indices like [(a, ij), (b, i), (a, ij)] -> [(a, ij), (b, i)] # Make sure that we map everything else appropriately as we remove inputs new_indices = [] seen = {} sub = {} # like {_0: _0, _1: _0, _2: _1} for i, x in enumerate(indices): if x[1] is not None and x in seen: sub[i] = seen[x] else: if x[1] is not None: seen[x] = len(new_indices) sub[i] = len(new_indices) new_indices.append(x) sub = {blockwise_token(k): blockwise_token(v) for k, v in sub.items()} dsk = {k: subs(v, sub) for k, v in dsk.items()} indices_check = {k for k, v in indices if v is not None} numblocks = toolz.merge([inp.numblocks for inp in inputs.values()]) numblocks = {k: v for k, v in numblocks.items() if v is None or k in indices_check} out = Blockwise(root, inputs[root].output_indices, dsk, new_indices, numblocks=numblocks, new_axes=new_axes, concatenate=concatenate) return out
def to_task_dask(expr): """Normalize a python object and merge all sub-graphs. - Replace ``Delayed`` with their keys - Convert literals to things the schedulers can handle - Extract dask graphs from all enclosed values Parameters ---------- expr : object The object to be normalized. This function knows how to handle ``Delayed``s, as well as most builtin python types. Returns ------- task : normalized task to be run dask : a merged dask graph that forms the dag for this task Examples -------- >>> a = delayed(1, 'a') >>> b = delayed(2, 'b') >>> task, dask = to_task_dask([a, b, 3]) >>> task # doctest: +SKIP ['a', 'b', 3] >>> dict(dask) # doctest: +SKIP {'a': 1, 'b': 2} >>> task, dasks = to_task_dask({a: 1, b: 2}) >>> task # doctest: +SKIP (dict, [['a', 1], ['b', 2]]) >>> dict(dask) # doctest: +SKIP {'a': 1, 'b': 2} """ warnings.warn("The dask.delayed.to_dask_dask function has been " "Deprecated in favor of unpack_collections", stacklevel=2) if isinstance(expr, Delayed): return expr.key, expr.dask if is_dask_collection(expr): name = 'finalize-' + tokenize(expr, pure=True) keys = expr.__dask_keys__() opt = getattr(expr, '__dask_optimize__', dont_optimize) finalize, args = expr.__dask_postcompute__() dsk = {name: (finalize, keys) + args} dsk.update(opt(expr.__dask_graph__(), keys)) return name, dsk if isinstance(expr, Iterator): expr = list(expr) typ = type(expr) if typ in (list, tuple, set): args, dasks = unzip((to_task_dask(e) for e in expr), 2) args = list(args) dsk = merge(dasks) # Ensure output type matches input type return (args, dsk) if typ is list else ((typ, args), dsk) if typ is dict: args, dsk = to_task_dask([[k, v] for k, v in expr.items()]) return (dict, args), dsk if is_dataclass(expr): args, dsk = to_task_dask([[f.name, getattr(expr, f.name)] for f in dataclass_fields(expr)]) return (apply, typ, (), (dict, args)), dsk if typ is slice: args, dsk = to_task_dask([expr.start, expr.stop, expr.step]) return (slice,) + tuple(args), dsk return expr, {}
mnemonic=mnemonics.SELFDESTRUCT, gas_cost=constants.GAS_SELFDESTRUCT_EIP150, ), opcode_values.CREATE: system.CreateEIP150.configure( __name__='opcode:CREATE', mnemonic=mnemonics.CREATE, gas_cost=GAS_CREATE, )(), opcode_values.CALL: call.CallEIP150.configure( __name__='opcode:CALL', mnemonic=mnemonics.CALL, gas_cost=constants.GAS_CALL_EIP150, )(), opcode_values.CALLCODE: call.CallCodeEIP150.configure( __name__='opcode:CALLCODE', mnemonic=mnemonics.CALLCODE, gas_cost=constants.GAS_CALL_EIP150, )(), opcode_values.DELEGATECALL: call.DelegateCallEIP150.configure( __name__='opcode:DELEGATECALL', mnemonic=mnemonics.DELEGATECALL, gas_cost=constants.GAS_CALL_EIP150, )(), } TANGERINE_WHISTLE_OPCODES = merge( copy.deepcopy(HOMESTEAD_OPCODES), UPDATED_OPCODES, )
def __init__(self, mapping_like=(), **kwargs): self._dict = self.proxy = MappingProxyType(merge(dict(mapping_like), kwargs)) self._hash = None
def build_composition( endpoint_protocol: 'EndpointProtocol', components: Dict[str, 'ModelComponent'], connections: List['Connection'], ) -> 'TaskComposition': r"""Build a composed graph. Notes on easy sources to introduce bugs. :: Input Data -------------------- a b c d | | | | \\ \ | / \ | || C_2 C_1 || / | | \ // / | / * RES_2 | | // \ | | // RES_1 \ | // C_2_1 | RES_3 --------------------- Output Data Because there are connections between ``C_1 -> C_2_1`` and ``C_2 -> C_2_1`` we can eliminate the ``serialize <-> deserialize`` tasks for the data transfered between these components. We need to be careful to not eliminate the ``serialize`` or ``deserialize`` tasks entirely though. In the case shown above, it is apparent ``RES_1`` & ``RES_2``. still need the ``serialize`` function, but the same also applies for ``deserialize``. Consider the example below with the same composition & connections as above: :: Input Data -------------------- a b c d | | | | \\ \ | /| \ | \\ C_2 | C_1 || / | | @\ || / | | @ \ // RES_2 | | @ * | | @ // \ \ | @ // RES_1 C_2_1 | RES_3 --------------------- Output Data Though we are using the same composition, the endpoints have been changed so that the previous result of ``C_1``-> ``C_2_1`` is now being provided by input ``c``. However, there is still a connection between ``C_1`` and ``C_2_1`` which is denoted by the ``@`` symbols... Though the first example (shown at the top of this docstring) would be able to eliminate ``C_2_1 deserailize``from ``C_2`` / ``C_1``, we see here that since endpoints define the path through the DAG, we cannot eliminate them entirely either. """ initial_task_dsk = _process_initial(endpoint_protocol, components) dsk_tgt_src_connections = {} for connection in connections: source_dsk = f"{connection.source_component}.outputs.{connection.source_key}" target_dsk = f"{connection.target_component}.inputs.{connection.target_key}" # value of target key is mapped one-to-one from value of source dsk_tgt_src_connections[target_dsk] = (identity, source_dsk) rewrite_ruleset = RuleSet() for dsk_payload_target_serial in initial_task_dsk.payload_tasks_dsk.keys(): dsk_payload_target, _serial_ident = dsk_payload_target_serial.rsplit( ".", maxsplit=1) if _serial_ident != "serial": raise RuntimeError( f"dsk_payload_target_serial={dsk_payload_target_serial}, " f"dsk_payload_target={dsk_payload_target}, _serial_ident={_serial_ident}" ) if dsk_payload_target in dsk_tgt_src_connections: # This rewrite rule ensures that exposed inputs are able to replace inputs # coming from connected components. If the payload keys are mapped in a # connection, replace the connection with the payload deserialize function. lhs = dsk_tgt_src_connections[dsk_payload_target] rhs = initial_task_dsk.merged_dsk[dsk_payload_target] rule = RewriteRule(lhs, rhs, vars=()) rewrite_ruleset.add(rule) io_subgraphs_merged = merge( initial_task_dsk.merged_dsk, dsk_tgt_src_connections, initial_task_dsk.result_tasks_dsk, initial_task_dsk.payload_tasks_dsk, ) # apply rewrite rules rewritten_dsk = valmap(rewrite_ruleset.rewrite, io_subgraphs_merged) # We perform a significant optimization here by culling any tasks which # have been made redundant by the rewrite rules, or which don't exist # on a path which is required for computation of the endpoint outputs culled_dsk, culled_deps = cull(rewritten_dsk, initial_task_dsk.output_keys) _verify_no_cycles(culled_dsk, initial_task_dsk.output_keys, endpoint_protocol.name) # as an optimization, we inline the `one_to_one` functions, into the # execution of their dependency. Since they are so cheap, there's no # need to spend time sending off a task to perform them. inlined = inline_functions( culled_dsk, initial_task_dsk.output_keys, fast_functions=[identity], inline_constants=True, dependencies=culled_deps, ) inlined_culled_dsk, inlined_culled_deps = cull( inlined, initial_task_dsk.output_keys) _verify_no_cycles(inlined_culled_dsk, initial_task_dsk.output_keys, endpoint_protocol.name) # pe-run topological sort of tasks so it doesn't have to be # recomputed upon every request. toposort_keys = toposort(inlined_culled_dsk) # construct results res = TaskComposition( dsk=inlined_culled_dsk, sortkeys=toposort_keys, get_keys=initial_task_dsk.output_keys, ep_dsk_input_keys=initial_task_dsk.payload_dsk_map, ep_dsk_output_keys=initial_task_dsk.result_dsk_map, pre_optimization_dsk=initial_task_dsk.merged_dsk, ) return res
def _process_initial( endpoint_protocol: 'EndpointProtocol', components: Dict[str, 'ModelComponent']) -> UnprocessedTaskDask: """Extract task dsk and payload / results keys and return computable form. Parameters ---------- endpoint_protocol endpoint protocol definition for the variation of the DAG which is currently being evaluated. components Mapping of component name -> component class definitions which contain independent subgraph task dsks'. Returns ------- UnprocessedTaskDask """ # mapping payload input keys -> serialized keys / tasks payload_dsk_key_map = { payload_key: f"{input_key}.serial" for payload_key, input_key in endpoint_protocol.dsk_input_key_map.items() } payload_input_tasks_dsk = { input_dsk_key: (identity, payload_key) for payload_key, input_dsk_key in payload_dsk_key_map.items() } # mapping result keys -> serialize keys / tasks res_dsk_key_map = { result_key: f"{output_key}.serial" for result_key, output_key in endpoint_protocol.dsk_output_key_map.items() } result_output_tasks_dsk = { result_key: (identity, output_dsk_key) for result_key, output_dsk_key in res_dsk_key_map.items() } output_keys = list(res_dsk_key_map.keys()) # need check to prevent cycle error _payload_keys = set(payload_dsk_key_map.keys()) _result_keys = set(res_dsk_key_map.keys()) if not _payload_keys.isdisjoint(_result_keys): raise KeyError( f"Request payload keys `{_payload_keys}` and response keys `{_result_keys}` " f"names cannot intersectt. keys: `{_payload_keys.intersection(_result_keys)}` " f"must be renamed in either `inputs` or `outputs`. ") component_dsk = merge( valmap(attrgetter("_gridserve_meta_.dsk"), components)) merged_dsk = merge(*(dsk for dsk in component_dsk.values())) return UnprocessedTaskDask( component_dsk=component_dsk, merged_dsk=merged_dsk, payload_tasks_dsk=payload_input_tasks_dsk, payload_dsk_map=payload_dsk_key_map, result_tasks_dsk=result_output_tasks_dsk, result_dsk_map=res_dsk_key_map, output_keys=output_keys, )
def saddleplot( binedges, counts, saddledata, cmap="coolwarm", scale="log", vmin=0.5, vmax=2, color=None, title=None, xlabel=None, ylabel=None, clabel=None, fig=None, fig_kws=None, heatmap_kws=None, margin_kws=None, cbar_kws=None, subplot_spec=None, ): """ Generate a saddle plot. Parameters ---------- binedges : 1D array-like For `n` bins, there should be `n + 1` bin edges counts : 1D array-like Signal track histogram produced by `digitize_track`. It will include 2 flanking elements for outlier values, thus the length should be `n + 2`. saddledata : 2D array-like Saddle matrix produced by `make_saddle`. It will include 2 flanking rows/columns for outlier signal values, thus the shape should be `(n+2, n+2)`. cmap : str or matplotlib colormap Colormap to use for plotting the saddle heatmap scale : str Color scaling to use for plotting the saddle heatmap: log or linear vmin, vmax : float Value limits for coloring the saddle heatmap color : matplotlib color value Face color for margin bar plots fig : matplotlib Figure, optional Specified figure to plot on. A new figure is created if none is provided. fig_kws : dict, optional Passed on to `plt.Figure()` heatmap_kws : dict, optional Passed on to `ax.imshow()` margin_kws : dict, optional Passed on to `ax.bar()` and `ax.barh()` cbar_kws : dict, optional Passed on to `plt.colorbar()` subplot_spec : GridSpec object Specify a subregion of a figure to using a GridSpec. Returns ------- Dictionary of axes objects. """ from matplotlib.gridspec import GridSpec, GridSpecFromSubplotSpec from matplotlib.colors import Normalize, LogNorm from matplotlib import ticker import matplotlib.pyplot as plt class MinOneMaxFormatter(ticker.LogFormatter): def set_locs(self, locs=None): self._sublabels = set([vmin % 10 * 10, vmax % 10, 1]) def __call__(self, x, pos=None): if x not in [vmin, 1, vmax]: return "" else: return "{x:g}".format(x=x) n_edges = len(binedges) n_bins = n_edges - 1 lo, hi = binedges[0], binedges[-1] # Histogram and saddledata are flanked by outlier bins n = saddledata.shape[0] X, Y = np.meshgrid(binedges, binedges) C = saddledata hist = counts if (n - n_bins) == 2: C = C[1:-1, 1:-1] hist = hist[1:-1] # Layout if subplot_spec is not None: GridSpec = partial(GridSpecFromSubplotSpec, subplot_spec=subplot_spec) grid = {} gs = GridSpec( nrows=3, ncols=3, width_ratios=[0.2, 1, 0.1], height_ratios=[0.2, 1, 0.1], wspace=0.05, hspace=0.05, ) # Figure if fig is None: fig_kws_default = dict(figsize=(5, 5)) fig_kws = merge(fig_kws_default, fig_kws if fig_kws is not None else {}) fig = plt.figure(**fig_kws) # Heatmap if scale == "log": norm = LogNorm(vmin=vmin, vmax=vmax) elif scale == "linear": norm = Normalize(vmin=vmin, vmax=vmax) else: raise ValueError("Only linear and log color scaling is supported") grid["ax_heatmap"] = ax = plt.subplot(gs[4]) heatmap_kws_default = dict(cmap="coolwarm", rasterized=True) heatmap_kws = merge(heatmap_kws_default, heatmap_kws if heatmap_kws is not None else {}) img = ax.pcolormesh(X, Y, C, norm=norm, **heatmap_kws) plt.gca().yaxis.set_visible(False) # Margins margin_kws_default = dict(edgecolor="k", facecolor=color, linewidth=1) margin_kws = merge(margin_kws_default, margin_kws if margin_kws is not None else {}) # left margin hist grid["ax_margin_y"] = plt.subplot(gs[3], sharey=grid["ax_heatmap"]) plt.barh(binedges[:-1], height=np.diff(binedges), width=hist, align="edge", **margin_kws) plt.xlim(plt.xlim()[1], plt.xlim()[0]) # fliplr plt.ylim(hi, lo) plt.gca().spines["top"].set_visible(False) plt.gca().spines["bottom"].set_visible(False) plt.gca().spines["left"].set_visible(False) plt.gca().xaxis.set_visible(False) # top margin hist grid["ax_margin_x"] = plt.subplot(gs[1], sharex=grid["ax_heatmap"]) plt.bar(binedges[:-1], width=np.diff(binedges), height=hist, align="edge", **margin_kws) plt.xlim(lo, hi) # plt.ylim(plt.ylim()) # correct plt.gca().spines["top"].set_visible(False) plt.gca().spines["right"].set_visible(False) plt.gca().spines["left"].set_visible(False) plt.gca().xaxis.set_visible(False) plt.gca().yaxis.set_visible(False) # Colorbar grid["ax_cbar"] = plt.subplot(gs[5]) cbar_kws_default = dict(fraction=0.8, label=clabel or "") cbar_kws = merge(cbar_kws_default, cbar_kws if cbar_kws is not None else {}) if scale == "linear" and vmin is not None and vmax is not None: grid["cbar"] = cb = plt.colorbar(img, **cbar_kws) # cb.set_ticks(np.arange(vmin, vmax + 0.001, 0.5)) # # do linspace between vmin and vmax of 5 segments and trunc to 1 decimal: decimal = 10 nsegments = 5 cd_ticks = np.trunc( np.linspace(vmin, vmax, nsegments) * decimal) / decimal cb.set_ticks(cd_ticks) else: grid["cbar"] = cb = plt.colorbar(img, format=MinOneMaxFormatter(), **cbar_kws) cb.ax.yaxis.set_minor_formatter(MinOneMaxFormatter()) # extra settings grid["ax_heatmap"].set_xlim(lo, hi) grid["ax_heatmap"].set_ylim(hi, lo) plt.grid(False) plt.axis("off") if title is not None: grid["ax_margin_x"].set_title(title) if xlabel is not None: grid["ax_heatmap"].set_xlabel(xlabel) if ylabel is not None: grid["ax_margin_y"].set_ylabel(ylabel) return grid
def copy(self: T, **kwargs: Any) -> T: new_target = self.spoof_target.copy(**kwargs) new_overrides = merge(self.overrides, kwargs) return type(self)(new_target, **new_overrides)
LEGACY_TRANSACTION_FORMATTERS, LEGACY_TRANSACTION_VALID_VALUES, is_int_or_prefixed_hexstr, is_rpc_structured_access_list, ) TYPED_TRANSACTION_FORMATTERS = merge( LEGACY_TRANSACTION_FORMATTERS, { 'chainId': hexstr_if_str(to_int), 'type': hexstr_if_str(to_int), 'accessList': apply_formatter_to_array( apply_formatters_to_dict( { "address": apply_one_of_formatters(( (is_string, hexstr_if_str(to_bytes)), (is_bytes, identity), )), "storageKeys": apply_formatter_to_array(hexstr_if_str(to_int)) } ), ), 'maxPriorityFeePerGas': hexstr_if_str(to_int), 'maxFeePerGas': hexstr_if_str(to_int), }, ) # Define typed transaction common sedes. # [[{20 bytes}, [{32 bytes}...]]...], where ... means “zero or more of the thing to the left”. access_list_sede_type = CountableList( List([ Binary.fixed_length(20, allow_empty=False),
def merge(self, *dicts, **kwargs): return fdict(cytoolz.merge(*((self,)+dicts), **kwargs))
def setup_main_filler(name, environment=None): return setup_filler(name, merge(DEFAULT_MAIN_ENVIRONMENT, environment or {}))
import copy from cytoolz import merge from evm import constants from evm import opcode_values from evm import mnemonics from evm.logic import ( call, ) from evm.vm.forks.frontier.opcodes import FRONTIER_OPCODES NEW_OPCODES = { opcode_values.DELEGATECALL: call.DelegateCall.configure( name='opcode:DELEGATECALL', mnemonic=mnemonics.DELEGATECALL, gas_cost=constants.GAS_CALL, )(), } HOMESTEAD_OPCODES = merge( copy.deepcopy(FRONTIER_OPCODES), NEW_OPCODES )
def to_task_dask(expr): """Normalize a python object and merge all sub-graphs. - Replace ``Delayed`` with their keys - Convert literals to things the schedulers can handle - Extract dask graphs from all enclosed values Parameters ---------- expr : object The object to be normalized. This function knows how to handle ``Delayed``s, as well as most builtin python types. Returns ------- task : normalized task to be run dask : a merged dask graph that forms the dag for this task Examples -------- >>> a = delayed(1, 'a') >>> b = delayed(2, 'b') >>> task, dask = to_task_dask([a, b, 3]) >>> task # doctest: +SKIP ['a', 'b', 3] >>> dict(dask) # doctest: +SKIP {'a': 1, 'b': 2} >>> task, dasks = to_task_dask({a: 1, b: 2}) >>> task # doctest: +SKIP (dict, [['a', 1], ['b', 2]]) >>> dict(dask) # doctest: +SKIP {'a': 1, 'b': 2} """ warnings.warn( "The dask.delayed.to_dask_dask function has been " "Deprecated in favor of unpack_collections", stacklevel=2) if isinstance(expr, Delayed): return expr.key, expr.dask if is_dask_collection(expr): name = 'finalize-' + tokenize(expr, pure=True) keys = expr.__dask_keys__() opt = getattr(expr, '__dask_optimize__', dont_optimize) finalize, args = expr.__dask_postcompute__() dsk = {name: (finalize, keys) + args} dsk.update(opt(expr.__dask_graph__(), keys)) return name, dsk if isinstance(expr, Iterator): expr = list(expr) typ = type(expr) if typ in (list, tuple, set): args, dasks = unzip((to_task_dask(e) for e in expr), 2) args = list(args) dsk = merge(dasks) # Ensure output type matches input type return (args, dsk) if typ is list else ((typ, args), dsk) if typ is dict: args, dsk = to_task_dask([[k, v] for k, v in expr.items()]) return (dict, args), dsk if is_dataclass(expr): args, dsk = to_task_dask([[f.name, getattr(expr, f.name)] for f in dataclass_fields(expr)]) return (apply, typ, (), (dict, args)), dsk if typ is slice: args, dsk = to_task_dask([expr.start, expr.stop, expr.step]) return (slice, ) + tuple(args), dsk return expr, {}
opcode_values.CREATE: system.CreateEIP150.configure( name='opcode:CREATE', mnemonic=mnemonics.CREATE, gas_cost=GAS_CREATE, )(), opcode_values.CALL: call.CallEIP150.configure( name='opcode:CALL', mnemonic=mnemonics.CALL, gas_cost=constants.GAS_CALL_EIP150, )(), opcode_values.CALLCODE: call.CallCodeEIP150.configure( name='opcode:CALLCODE', mnemonic=mnemonics.CALLCODE, gas_cost=constants.GAS_CALL_EIP150, )(), opcode_values.DELEGATECALL: call.DelegateCallEIP150.configure( name='opcode:DELEGATECALL', mnemonic=mnemonics.DELEGATECALL, gas_cost=constants.GAS_CALL_EIP150, )(), } EIP150_OPCODES = merge( copy.deepcopy(HOMESTEAD_OPCODES), UPDATED_OPCODES, )
def _saddleplot(binedges, digitized, saddledata, contact_type, color, cbar_label=None, fig_kws=None, heatmap_kws=None, margin_kws=None): """ Plot saddle data and signal histograms in the margins. """ n_bins = len(binedges) - 1 lo, hi = 0, n_bins #-0.5, n_bins - 1.5 # Populate kwargs fig_kws = merge(dict(figsize=(5, 5)), fig_kws if fig_kws is not None else {}) heatmap_kws = merge( dict(aspect='auto', cmap='coolwarm', interpolation='none', vmin=-0.5, vmax=0.5), heatmap_kws if heatmap_kws is not None else {}, ) vmin = heatmap_kws['vmin'] vmax = heatmap_kws['vmax'] margin_kws = merge( dict(bins=n_bins, range=(0, len(binedges)), histtype='stepfilled', edgecolor='k', facecolor=color, linewidth=1), margin_kws if margin_kws is not None else {}, ) # layout gs = GridSpec( nrows=3, ncols=3, width_ratios=[0.2, 1, 0.1], height_ratios=[0.2, 1, 0.1], wspace=0.05, hspace=0.05, ) fig = plt.figure(**fig_kws) # heatmap ax = ax1 = plt.subplot(gs[4]) img = ax.imshow(np.log10(saddledata), **heatmap_kws) plt.xticks( np.arange(0, n_bins)[::5], [ '{:0.4f}'.format(t) for t in ((binedges[1:] + binedges[:-1]) / 2)[::5] ], rotation=90, ) plt.yticks([]) plt.xlim(lo, hi) plt.ylim(hi, lo) # left margin plt.subplot(gs[3]) plt.hist(np.concatenate(list(digitized.values())), **merge(margin_kws, {'orientation': 'horizontal'})) plt.xticks([]) plt.yticks( np.arange(0, n_bins)[::5], [ '{:0.4f}'.format(t) for t in ((binedges[1:] + binedges[:-1]) / 2)[::5] ], ) plt.xlim(plt.xlim()[1], plt.xlim()[0]) # fliplr plt.ylim(hi, lo) plt.gca().spines['top'].set_visible(False) plt.gca().spines['bottom'].set_visible(False) plt.gca().spines['left'].set_visible(False) # top margin plt.subplot(gs[1]) plt.hist(np.concatenate(list(digitized.values())), **margin_kws) plt.xticks([]) plt.yticks([]) plt.xlim(lo, hi) plt.ylim(plt.ylim()[0], plt.ylim()[1]) # correct plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.gca().spines['left'].set_visible(False) # colorbar plt.subplot(gs[5]) cb = plt.colorbar(img, fraction=0.8, label=cbar_label) if vmin is not None and vmax is not None: cb.set_ticks(np.arange(vmin, vmax + 0.001, 0.5)) plt.grid(False) plt.axis('off') return fig
gas_cost=constants.GAS_SELFDESTRUCT, ), } HOMESTEAD_UPDATED_OPCODES = { opcode_values.DELEGATECALL: call.DelegateCall.configure( __name__='opcode:DELEGATECALL', mnemonic=mnemonics.DELEGATECALL, gas_cost=constants.GAS_CALL, )(), } HOMESTEAD_OPCODES = merge( copy.deepcopy(FRONTIER_OPCODES), HOMESTEAD_UPDATED_OPCODES ) TANGERINE_WHISTLE_UPDATED_OPCODES = { opcode_values.EXTCODESIZE: as_opcode( logic_fn=context.extcodesize, mnemonic=mnemonics.EXTCODESIZE, gas_cost=GAS_EXTCODE_EIP150, ), opcode_values.EXTCODECOPY: as_opcode( logic_fn=context.extcodecopy, mnemonic=mnemonics.EXTCODECOPY, gas_cost=GAS_EXTCODE_EIP150, ),
def saddleplot(binedges, counts, saddledata, cmap='coolwarm', vmin=-1, vmax=1, color=None, title=None, xlabel=None, ylabel=None, clabel=None, fig=None, fig_kws=None, heatmap_kws=None, margin_kws=None, cbar_kws=None, subplot_spec=None): """ Generate a saddle plot. Parameters ---------- binedges : 1D array-like For `n` bins, there should be `n + 1` bin edges counts : 1D array-like Signal track histogram produced by `digitize_track`. It will include 2 flanking elements for outlier values, thus the length should be `n + 2`. saddledata : 2D array-like Saddle matrix produced by `make_saddle`. It will include 2 flanking rows/columns for outlier signal values, thus the shape should be `(n+2, n+2)`. cmap : str or matplotlib colormap Colormap to use for plotting the saddle heatmap vmin, vmax : float Value limits for coloring the saddle heatmap color : matplotlib color value Face color for margin bar plots fig : matplotlib Figure, optional Specified figure to plot on. A new figure is created if none is provided. fig_kws : dict, optional Passed on to `plt.Figure()` heatmap_kws : dict, optional Passed on to `ax.imshow()` margin_kws : dict, optional Passed on to `ax.bar()` and `ax.barh()` cbar_kws : dict, optional Passed on to `plt.colorbar()` subplot_spec : GridSpec object Specify a subregion of a figure to using a GridSpec. Returns ------- Dictionary of axes objects. """ from matplotlib.gridspec import GridSpec, GridSpecFromSubplotSpec import matplotlib.pyplot as plt from cytoolz import merge n_edges = len(binedges) n_bins = n_edges - 1 lo, hi = binedges[0], binedges[-1] # Histogram and saddledata are flanked by outlier bins n = saddledata.shape[0] X, Y = np.meshgrid(binedges, binedges) C = saddledata hist = counts if (n - n_bins) == 2: C = C[1:-1, 1:-1] hist = hist[1:-1] # Layout if subplot_spec is not None: GridSpec = partial(GridSpecFromSubplotSpec, subplot_spec=subplot_spec) grid = {} gs = GridSpec( nrows=3, ncols=3, width_ratios=[0.2, 1, 0.1], height_ratios=[0.2, 1, 0.1], wspace=0.05, hspace=0.05, ) # Figure if fig is None: fig_kws_default = dict(figsize=(5, 5)) fig_kws = merge( fig_kws_default, fig_kws if fig_kws is not None else {} ) fig = plt.figure(**fig_kws) # Heatmap grid['ax_heatmap'] = ax = plt.subplot(gs[4]) heatmap_kws_default = dict( cmap='coolwarm', rasterized=True, vmin=vmin, vmax=vmax) heatmap_kws = merge( heatmap_kws_default, heatmap_kws if heatmap_kws is not None else {}) img = ax.pcolormesh(X, Y, C, **heatmap_kws) vmin = heatmap_kws['vmin'] vmax = heatmap_kws['vmax'] plt.gca().yaxis.set_visible(False) # Margins margin_kws_default = dict( edgecolor='k', facecolor=color, linewidth=1) margin_kws = merge( margin_kws_default, margin_kws if margin_kws is not None else {}) # left margin hist grid['ax_margin_y'] = plt.subplot(gs[3], sharey=grid['ax_heatmap']) plt.barh(binedges[:-1], height=np.diff(binedges), width=hist, align='edge', **margin_kws) plt.xlim(plt.xlim()[1], plt.xlim()[0]) # fliplr plt.ylim(hi, lo) plt.gca().spines['top'].set_visible(False) plt.gca().spines['bottom'].set_visible(False) plt.gca().spines['left'].set_visible(False) plt.gca().xaxis.set_visible(False) # top margin hist grid['ax_margin_x'] = plt.subplot(gs[1], sharex=grid['ax_heatmap']) plt.bar(binedges[:-1], width=np.diff(binedges), height=hist, align='edge', **margin_kws) plt.xlim(lo, hi) # plt.ylim(plt.ylim()) # correct plt.gca().spines['top'].set_visible(False) plt.gca().spines['right'].set_visible(False) plt.gca().spines['left'].set_visible(False) plt.gca().xaxis.set_visible(False) plt.gca().yaxis.set_visible(False) # Colorbar grid['ax_cbar'] = plt.subplot(gs[5]) cbar_kws_default = dict( fraction=0.8, label=clabel or '') cbar_kws = merge( cbar_kws_default, cbar_kws if cbar_kws is not None else {}) grid['cbar'] = cb = plt.colorbar(img, **cbar_kws) if vmin is not None and vmax is not None: # cb.set_ticks(np.arange(vmin, vmax + 0.001, 0.5)) # # do linspace between vmin and vmax of 5 segments and trunc to 1 decimal: decimal = 10 nsegments = 5 cd_ticks = np.trunc(np.linspace(vmin, vmax, nsegments)*decimal)/decimal cb.set_ticks(cd_ticks) # extra settings grid['ax_heatmap'].set_xlim(lo, hi) grid['ax_heatmap'].set_ylim(hi, lo) plt.grid(False) plt.axis('off') if title is not None: grid['ax_margin_x'].set_title(title) if xlabel is not None: grid['ax_heatmap'].set_xlabel(xlabel) if ylabel is not None: grid['ax_margin_y'].set_ylabel(ylabel) return grid
def extra(self): return merge({"prefix": self.prefix}, template_variables)
mnemonic=mnemonics.PAYGAS, gas_cost=constants.GAS_VERYLOW, ), } REMOVED_OPCODES = [ opcode_values.CREATE, opcode_values.SELFDESTRUCT, ] REPLACED_OPCODES = { opcode_values.CALL: call.CallSharding.configure( __name__='opcode:CALL', mnemonic=mnemonics.CALL, gas_cost=GAS_CALL_EIP150, )(), opcode_values.GASPRICE: as_opcode( logic_fn=context.PAYGAS_gasprice, mnemonic=mnemonics.GASPRICE, gas_cost=constants.GAS_BASE, ), } SHARDING_OPCODES = merge( dissoc(copy.deepcopy(BYZANTIUM_OPCODES), *REMOVED_OPCODES), NEW_OPCODES, REPLACED_OPCODES, )
def alignment_stats(lable_ind, label_val, pred_ind, pred_val, batch_size, debug=False): """Returns a list of numpy array representing alignemnt stats. First N elements are in aligment_stats_ordering and the last one in identity. The return is like this due to tf.py_func requirements --> this function is made for embedding as tf operation via tf.py_func :param lable_ind: :param label_val: :param pred_ind: :param pred_val: :param batch_size: :param debug: :return: """ prefix = os.environ.get("MINCALL_LOG_DATA", None) if prefix: fname = os.path.abspath(os.path.join(prefix, f"{uuid.uuid4().hex}.npz")) with open(fname, "wb") as f: np.savez( f, **{ "label_val": label_val, "lable_ind": lable_ind, "pred_val": pred_val, "pred_ind": pred_ind, "batch_size": batch_size, }) logger.debug(f"Saves alignment stats input data to {fname}") yt = defaultdict(list) for ind, val in zip(lable_ind, label_val): yt[ind[0]].append(val) yp = defaultdict(list) for ind, val in zip(pred_ind, pred_val): yp[ind[0]].append(val) sol = defaultdict(list) identities = [] for x in range(batch_size): query = decode(np.array(yp[x], dtype=int)) target = decode(np.array(yt[x], dtype=int)) if len(target) == 0: raise ValueError("Empty target sequence") if len(query) == 0: logger.warning(f"Empty query sequence\n" f"Target: {target}") sol[dataset_pb2.MATCH].append(0.0) sol[dataset_pb2.MISMATCH].append(0.0) sol[dataset_pb2.DELETION].append(1.0) sol[dataset_pb2.INSERTION].append(0.0) identities.append(0) continue edlib_res = edlib.align(query, target, task='path') stats = ext_cigar_stats(edlib_res['cigar']) read_len = stats[dataset_pb2.MISMATCH] + stats[ dataset_pb2.MATCH] + stats[dataset_pb2.INSERTION] # https://github.com/isovic/samscripts/blob/master/src/errorrates.py identities.append(stats[dataset_pb2.MATCH] / sum(stats.values())) for op in aligment_stats_ordering: sol[op].append(stats[op] / read_len) if True: msg = "edlib results\n" s_query, s_target, _ = squggle(query, target) exp_cigar = expand_cigar(edlib_res['cigar']) for i in range(0, len(s_query), 80): msg += "query: " + s_query[i:i + 80] + "\n" msg += "target: " + s_target[i:i + 80] + "\n" msg += "cigar : " + exp_cigar[i:i + 80] + "\n" msg += "--------" + 80 * "-" + "\n" msg += "query: " + query + "\n" msg += "target: " + target + "\n" msg += "full cigar: " + edlib_res['cigar'] + "\n" msg += pformat( {dataset_pb2.Cigar.Name(k): v for k, v in stats.items()}) + "\n" msg += "readl: " + str(read_len) + "\n" df = pd.DataFrame({ "query": toolz.merge( toolz.frequencies(query), toolz.keymap( "".join, toolz.frequencies(toolz.sliding_window(2, query))), ), "target": toolz.merge( toolz.frequencies(target), toolz.keymap( "".join, toolz.frequencies(toolz.sliding_window(2, target))), ), }) df["delta"] = 100 * (df['target'] / df['query'] - 1) df = df[['query', 'target', 'delta']] msg += "Stats\n" + str(df) + "\n" msg += "==================\n" logger.info(msg) sol = [ np.array(sol[op], dtype=np.float32) for op in aligment_stats_ordering ] sol_data = { dataset_pb2.Cigar.Name(k): v for k, v in zip(aligment_stats_ordering, sol) } sol_data["IDENTITY"] = identities logger.info(f"sol: \n{pd.DataFrame(sol_data)}") return sol + [np.array(identities, dtype=np.float32)]
opcode_values.SHR: as_opcode( logic_fn=arithmetic.shr, mnemonic=mnemonics.SHR, gas_cost=constants.GAS_VERYLOW, ), opcode_values.SAR: as_opcode( logic_fn=arithmetic.sar, mnemonic=mnemonics.SAR, gas_cost=constants.GAS_VERYLOW, ), opcode_values.EXTCODEHASH: as_opcode( logic_fn=context.extcodehash, mnemonic=mnemonics.EXTCODEHASH, gas_cost=GAS_EXTCODEHASH_EIP1052, ), opcode_values.CREATE2: system.Create2.configure( __name__='opcode:CREATE2', mnemonic=mnemonics.CREATE2, gas_cost=constants.GAS_CREATE, )(), } CONSTANTINOPLE_OPCODES = merge( copy.deepcopy(BYZANTIUM_OPCODES), UPDATED_OPCODES, )
def apply_gufunc(func, signature, *args, **kwargs): """ Apply a generalized ufunc or similar python function to arrays. ``signature`` determines if the function consumes or produces core dimensions. The remaining dimensions in given input arrays (``*args``) are considered loop dimensions and are required to broadcast naturally against each other. In other terms, this function is like np.vectorize, but for the blocks of dask arrays. If the function itself shall also be vectorized use ``vectorize=True`` for convenience. Parameters ---------- func : callable Function to call like ``func(*args, **kwargs)`` on input arrays (``*args``) that returns an array or tuple of arrays. If multiple arguments with non-matching dimensions are supplied, this function is expected to vectorize (broadcast) over axes of positional arguments in the style of NumPy universal functions [1]_ (if this is not the case, set ``vectorize=True``). If this function returns multiple outputs, ``output_core_dims`` has to be set as well. signature: string Specifies what core dimensions are consumed and produced by ``func``. According to the specification of numpy.gufunc signature [2]_ *args : numeric Input arrays or scalars to the callable function. output_dtypes : dtype or list of dtypes, keyword only dtype or list of output dtypes. output_sizes : dict, optional, keyword only Optional mapping from dimension names to sizes for outputs. Only used if new core dimensions (not found on inputs) appear on outputs. vectorize: bool, keyword only If set to ``True``, ``np.vectorize`` is applied to ``func`` for convenience. Defaults to ``False``. allow_rechunk: Optional, bool, keyword only Allows rechunking, otherwise chunk sizes need to match and core dimensions are to consist only of one chunk. Warning: enabling this can increase memory usage significantly. Defaults to ``False``. **kwargs : dict Extra keyword arguments to pass to `func` Returns ------- Single dask.array.Array or tuple of dask.array.Array Examples -------- >>> import dask.array as da >>> import numpy as np >>> def stats(x): ... return np.mean(x, axis=-1), np.std(x, axis=-1) >>> a = da.random.normal(size=(10,20,30), chunks=(5, 10, 30)) >>> mean, std = da.apply_gufunc(stats, "(i)->(),()", a, output_dtypes=2*(a.dtype,)) >>> mean.compute().shape (10, 20) >>> def outer_product(x, y): ... return np.einsum("i,j->ij", x, y) >>> a = da.random.normal(size=( 20,30), chunks=(10, 30)) >>> b = da.random.normal(size=(10, 1,40), chunks=(5, 1, 40)) >>> c = da.apply_gufunc(outer_product, "(i),(j)->(i,j)", a, b, output_dtypes=a.dtype, vectorize=True) >>> c.compute().shape (10, 20, 30, 40) References ---------- .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html """ output_dtypes = kwargs.pop("output_dtypes", None) output_sizes = kwargs.pop("output_sizes", None) vectorize = kwargs.pop("vectorize", None) allow_rechunk = kwargs.pop("allow_rechunk", False) # Input processing: ## Signature if not isinstance(signature, str): raise TypeError('`signature` has to be of type string') core_input_dimss, core_output_dimss = _parse_gufunc_signature(signature) ## Determine nout: nout = None for functions of one direct return; nout = int for return tuples nout = None if not isinstance(core_output_dimss, list) else len(core_output_dimss) ## Assert output_dtypes if output_dtypes is None: raise ValueError("Must specify `output_dtypes` of output array(s)") elif isinstance(output_dtypes, str): otypes = list(output_dtypes) output_dtypes = otypes[0] if nout is None else otypes elif isinstance(output_dtypes, (tuple, list)): if nout is None: raise ValueError("Must specify single dtype for `output_dtypes` for function with one output") otypes = output_dtypes else: if nout is not None: raise ValueError("Must specify tuple of dtypes for `output_dtypes` for function with multiple outputs") otypes = [output_dtypes] ## Vectorize function, if required if vectorize: func = np.vectorize(func, signature=signature, otypes=otypes) ## Miscellaneous if output_sizes is None: output_sizes = {} # Main code: ## Cast all input arrays to dask args = [asarray(a) for a in args] if len(core_input_dimss) != len(args): ValueError("According to `signature`, `func` requires %d arguments, but %s given" % (len(core_output_dimss), len(args))) ## Assess input args for loop dims input_shapes = [a.shape for a in args] input_chunkss = [tuple(c[0] for c in a.chunks) for a in args] num_loopdims = [len(s) - len(cd) for s, cd in zip(input_shapes, core_input_dimss)] max_loopdims = max(num_loopdims) if num_loopdims else None _core_input_shapes = [dict(zip(cid, s[n:])) for s, n, cid in zip(input_shapes, num_loopdims, core_input_dimss)] core_shapes = merge(output_sizes, *_core_input_shapes) loop_input_dimss = [tuple("__loopdim%d__" % d for d in range(max_loopdims - n, max_loopdims)) for n in num_loopdims] input_dimss = [l + c for l, c in zip(loop_input_dimss, core_input_dimss)] loop_output_dims = max(loop_input_dimss, key=len) if loop_input_dimss else set() ## Assess input args for same size and chunk sizes ### Collect sizes and chunksizes of all dims in all arrays dimsizess = {} chunksizess = {} for dims, shape, chunksizes in zip(input_dimss, input_shapes, input_chunkss): for dim, size, chunksize in zip(dims, shape, chunksizes): _dimsizes = dimsizess.get(dim, []) _dimsizes.append(size) dimsizess[dim] = _dimsizes _chunksizes = chunksizess.get(dim, []) _chunksizes.append(chunksize) chunksizess[dim] = _chunksizes ### Assert correct partitioning, for case: for dim, sizes in dimsizess.items(): #### Check that the arrays have same length for same dimensions or dimension `1` if set(sizes).union({1}) != {1, max(sizes)}: raise ValueError("Dimension `'{}'` with different lengths in arrays".format(dim)) if not allow_rechunk: chunksizes = chunksizess[dim] #### Check if core dimensions consist of only one chunk if (dim in core_shapes) and (chunksizes[0] < core_shapes[dim]): raise ValueError("Core dimension `'{}'` consists of multiple chunks. To fix, rechunk into a single \ chunk along this dimension or set `allow_rechunk=True`, but beware that this may increase memory usage \ significantly.".format(dim)) #### Check if loop dimensions consist of same chunksizes, when they have sizes > 1 relevant_chunksizes = list(unique(c for s, c in zip(sizes, chunksizes) if s > 1)) if len(relevant_chunksizes) > 1: raise ValueError("Dimension `'{}'` with different chunksize present".format(dim)) ## Apply function - use atop here arginds = list(concat(zip(args, input_dimss))) ### Use existing `atop` but only with loopdims to enforce ### concatenation for coredims that appear also at the output ### Modifying `atop` could improve things here. tmp = atop(func, loop_output_dims, *arginds, dtype=int, # Only dummy dtype, anyone will do concatenate=True, **kwargs) ## Prepare output shapes loop_output_shape = tmp.shape loop_output_chunks = tmp.chunks dsk = tmp.__dask_graph__() keys = list(flatten(tmp.__dask_keys__())) _anykey = keys[0] name, token = _anykey[0].split('-') ### *) Treat direct output if nout is None: core_output_dimss = [core_output_dimss] output_dtypes = [output_dtypes] ## Split output leaf_arrs = [] for i, cod, odt in zip(count(0), core_output_dimss, output_dtypes): core_output_shape = tuple(core_shapes[d] for d in cod) core_chunkinds = len(cod) * (0,) output_shape = loop_output_shape + core_output_shape output_chunks = loop_output_chunks + core_output_shape leaf_name = "%s_%d-%s" % (name, i, token) leaf_dsk = {(leaf_name,) + key[1:] + core_chunkinds: ((getitem, key, i) if nout else key) for key in keys} leaf_arr = Array(sharedict.merge((leaf_name, leaf_dsk), dsk), leaf_name, chunks=output_chunks, shape=output_shape, dtype=odt) leaf_arrs.append(leaf_arr) return leaf_arrs if nout else leaf_arrs[0] # Undo *) from above
async def collate(network, shard_id, period, address, smc_handler, node_id): node = network.find_node(node_id) logger.info("Listening for proposals for period {}".format(period)) shard = smc_handler.shards[shard_id] availabilities = {} while True: # stop once it's time to submit (TODO: timing and cancel windback if necessary) if smc_handler.get_current_period() >= period: await asyncio.sleep(PERIOD_TIME / 2) break # get a candidate head of which we don't know if it's available or not candidate_iterator = shard.get_candidate_head_iterator() try: candidate_head = next(header for header in candidate_iterator if header.hash not in availabilities) except StopIteration: await smc_handler.wait_for_next_block() continue # windback checked_availabilities = await windback(network, shard, candidate_head, availabilities) availabilities = merge(availabilities, checked_availabilities) # get best head of which we know is available candidate_iterator = shard.get_candidate_head_iterator() try: parent_header = next(header for header in candidate_iterator if availabilities.get(header.hash, False)) except StopIteration: logger.warning("Could not find available chain to built on top of") return else: logger.info("Extend chain with head {}".format(parent_header)) # filter received proposals messages = await receive_and_broadcast_message(node) proposals = [ message for message in messages if isinstance(message, CollationHeader) # and message.proposer == 'proposer_1' # censor! and message.shard_id == shard_id and message.period == period and message.parent_hash == parent_header.hash ] # overslept if smc_handler.get_current_period() > period: logger.warning("Missed submitting proposal".format(period)) return network.remove_peer(node) if proposals: logger.info("Submitting one of {} collected proposals".format( len(proposals))) proposal = random.choice(proposals) smc_handler.add_header(address, proposal) else: logger.warning("No suitable proposals collected".format(period))
mnemonic=mnemonics.SELFDESTRUCT, gas_cost=constants.GAS_SELFDESTRUCT_EIP150, ), opcode_values.CREATE: system.CreateEIP150.configure( name='opcode:CREATE', mnemonic=mnemonics.CREATE, gas_cost=GAS_CREATE, )(), opcode_values.CALL: call.CallEIP150.configure( name='opcode:CALL', mnemonic=mnemonics.CALL, gas_cost=constants.GAS_CALL_EIP150, )(), opcode_values.CALLCODE: call.CallCodeEIP150.configure( name='opcode:CALLCODE', mnemonic=mnemonics.CALLCODE, gas_cost=constants.GAS_CALL_EIP150, )(), opcode_values.DELEGATECALL: call.DelegateCallEIP150.configure( name='opcode:DELEGATECALL', mnemonic=mnemonics.DELEGATECALL, gas_cost=constants.GAS_CALL_EIP150, )(), } TANGERINE_WHISTLE_OPCODES = merge( copy.deepcopy(HOMESTEAD_OPCODES), UPDATED_OPCODES, )
def normalize_transaction_dict( transaction_dict: Dict[str, str]) -> Dict[str, Any]: normalized_dict = apply_formatters_to_dict(TRANSACTION_NORMALIZER, transaction_dict) return merge(SAFE_TRANSACTION_DEFAULTS, normalized_dict)
def merge(d, *dicts, **kwargs): return cytoolz.merge(d, *dicts, **kwargs)
import copy from cytoolz import merge from evm import constants from evm import opcode_values from evm import mnemonics from evm.logic import ( call, ) from evm.vm.forks.frontier.opcodes import FRONTIER_OPCODES NEW_OPCODES = { opcode_values.DELEGATECALL: call.DelegateCall.configure( name='opcode:DELEGATECALL', mnemonic=mnemonics.DELEGATECALL, gas_cost=constants.GAS_CALL, )(), } HOMESTEAD_OPCODES = merge(copy.deepcopy(FRONTIER_OPCODES), NEW_OPCODES)
cytoolz.partition_all, cytoolz.assoc, cytoolz.mapcat, cytoolz.filter, cytoolz.countby, cytoolz.merge_with, cytoolz.update_in, cytoolz.keyfilter, cytoolz.groupby, ]) def _curry_namespace(ns): return dict( (name, cytoolz.curry(f) if f in _curry_set else f) for name, f in ns.items() if '__' not in name ) locals().update(cytoolz.merge( _curry_namespace(vars(cytoolz)), _curry_namespace(vars(exceptions)), )) # Clean up the namespace. del _curry_set del _curry_namespace del exceptions del cytoolz
def process_ngrams(self, filename, Encoder, save = False): print("\t\tStarting " + filename) #Initialize bigram dictionary ngrams = defaultdict(int) unigrams = defaultdict(int) starting = time.time() total = 0 for line in Encoder.load_stream(filename): total += len(line) #Store unigrams for item in line: unigrams[(1, item[0])] += 1 unigrams[(2, item[1])] += 1 unigrams[(3, item[2])] += 1 try: for bigram in ct.sliding_window(2, line): #Tuples are indexes for (LEX, POS, CAT) #Index types are 1 (LEX), 2 (POS), 3 (CAT) ngrams[((1, bigram[0][0]), (1, bigram[1][0]))] += 1 #lex_lex ngrams[((1, bigram[0][0]), (2, bigram[1][1]))] += 1 #lex_pos ngrams[((1, bigram[0][0]), (3, bigram[1][2]))] += 1 #lex_cat ngrams[((2, bigram[0][1]), (2, bigram[1][1]))] += 1 #pos_pos ngrams[((2, bigram[0][1]), (1, bigram[1][0]))] += 1 #pos_lex ngrams[((2, bigram[0][1]), (3, bigram[1][2]))] += 1 #pos_cat ngrams[((3, bigram[0][2]), (3, bigram[1][2]))] += 1 #cat_cat ngrams[((3, bigram[0][2]), (2, bigram[1][1]))] += 1 #cat_pos ngrams[((3, bigram[0][2]), (1, bigram[1][0]))] += 1 #cat_lex #Catch errors from empty lines coming out of the encoder except Exception as e: error = e #Reduce nonce ngrams size = len(list(ngrams.keys())) keepable = lambda x: x > 1 ngrams = ct.valfilter(keepable, ngrams) #Note: Keep all unigrams, they are already limited by the lexicon #Reduce null indexes ngrams = {key: ngrams[key] for key in list(ngrams.keys()) if 0 not in key[0] and 0 not in key[1]} unigrams = {key: unigrams[key] for key in list(unigrams.keys()) if 0 not in key} ngrams = ct.merge([ngrams, unigrams]) ngrams["TOTAL"] = total del unigrams #Print status print("\tTime: ", end = "") print(time.time() - starting, end = "") print(" Full: " + str(size) + " ", end = "") print(" Reduced: ", end = "") print(len(list(ngrams.keys())), end = "") print(" with " + str(ngrams["TOTAL"]) + " words.") if save == True: self.Loader.save_file(ngrams, filename + ".ngrams.p") return os.path.join(self.Loader.output_dir, filename + ".ngrams.p") else: return ngrams
# opcode_values.CREATE: system.CreateByzantium.configure( name='opcode:CREATE', mnemonic=mnemonics.CREATE, gas_cost=constants.GAS_CREATE, )(), # TODO: CREATE2 # # Storage # opcode_values.SSTORE: as_opcode( logic_fn=ensure_no_static(storage.sstore), mnemonic=mnemonics.SSTORE, gas_cost=constants.GAS_NULL, ), # # Self Destruct # opcode_values.SELFDESTRUCT: as_opcode( logic_fn=ensure_no_static(system.selfdestruct_eip161), mnemonic=mnemonics.SELFDESTRUCT, gas_cost=GAS_SELFDESTRUCT_EIP150, ), } BYZANTIUM_OPCODES = merge( copy.deepcopy(SPURIOUS_DRAGON_OPCODES), UPDATED_OPCODES, )
from cytoolz import merge from hvm.exceptions import ( WriteProtection, ) def ensure_no_static(opcode_fn): @functools.wraps(opcode_fn) def inner(computation): if computation.msg.is_static: raise WriteProtection( "Cannot modify state while inside of a STATICCALL context") return opcode_fn(computation) return inner from hvm.vm.forks.helios_testnet.opcodes import HELIOS_TESTNET_OPCODES BOSON_UPDATED_OPCODES = { # # Call # } BOSON_OPCODES = merge( copy.deepcopy(HELIOS_TESTNET_OPCODES), BOSON_UPDATED_OPCODES, )
def apply_gufunc(func, signature, *args, **kwargs): """ Apply a generalized ufunc or similar python function to arrays. ``signature`` determines if the function consumes or produces core dimensions. The remaining dimensions in given input arrays (``*args``) are considered loop dimensions and are required to broadcast naturally against each other. In other terms, this function is like np.vectorize, but for the blocks of dask arrays. If the function itself shall also be vectorized use ``vectorize=True`` for convenience. Parameters ---------- func : callable Function to call like ``func(*args, **kwargs)`` on input arrays (``*args``) that returns an array or tuple of arrays. If multiple arguments with non-matching dimensions are supplied, this function is expected to vectorize (broadcast) over axes of positional arguments in the style of NumPy universal functions [1]_ (if this is not the case, set ``vectorize=True``). If this function returns multiple outputs, ``output_core_dims`` has to be set as well. signature: string Specifies what core dimensions are consumed and produced by ``func``. According to the specification of numpy.gufunc signature [2]_ *args : numeric Input arrays or scalars to the callable function. axes: List of tuples, optional, keyword only A list of tuples with indices of axes a generalized ufunc should operate on. For instance, for a signature of ``"(i,j),(j,k)->(i,k)"`` appropriate for matrix multiplication, the base elements are two-dimensional matrices and these are taken to be stored in the two last axes of each argument. The corresponding axes keyword would be ``[(-2, -1), (-2, -1), (-2, -1)]``. For simplicity, for generalized ufuncs that operate on 1-dimensional arrays (vectors), a single integer is accepted instead of a single-element tuple, and for generalized ufuncs for which all outputs are scalars, the output tuples can be omitted. axis: int, optional, keyword only A single axis over which a generalized ufunc should operate. This is a short-cut for ufuncs that operate over a single, shared core dimension, equivalent to passing in axes with entries of (axis,) for each single-core-dimension argument and ``()`` for all others. For instance, for a signature ``"(i),(i)->()"``, it is equivalent to passing in ``axes=[(axis,), (axis,), ()]``. keepdims: bool, optional, keyword only If this is set to True, axes which are reduced over will be left in the result as a dimension with size one, so that the result will broadcast correctly against the inputs. This option can only be used for generalized ufuncs that operate on inputs that all have the same number of core dimensions and with outputs that have no core dimensions , i.e., with signatures like ``"(i),(i)->()"`` or ``"(m,m)->()"``. If used, the location of the dimensions in the output can be controlled with axes and axis. output_dtypes : Optional, dtype or list of dtypes, keyword only Valid numpy dtype specification or list thereof. If not given, a call of ``func`` with a small set of data is performed in order to try to automatically determine the output dtypes. output_sizes : dict, optional, keyword only Optional mapping from dimension names to sizes for outputs. Only used if new core dimensions (not found on inputs) appear on outputs. vectorize: bool, keyword only If set to ``True``, ``np.vectorize`` is applied to ``func`` for convenience. Defaults to ``False``. allow_rechunk: Optional, bool, keyword only Allows rechunking, otherwise chunk sizes need to match and core dimensions are to consist only of one chunk. Warning: enabling this can increase memory usage significantly. Defaults to ``False``. **kwargs : dict Extra keyword arguments to pass to `func` Returns ------- Single dask.array.Array or tuple of dask.array.Array Examples -------- >>> import dask.array as da >>> import numpy as np >>> def stats(x): ... return np.mean(x, axis=-1), np.std(x, axis=-1) >>> a = da.random.normal(size=(10,20,30), chunks=(5, 10, 30)) >>> mean, std = da.apply_gufunc(stats, "(i)->(),()", a) >>> mean.compute().shape (10, 20) >>> def outer_product(x, y): ... return np.einsum("i,j->ij", x, y) >>> a = da.random.normal(size=( 20,30), chunks=(10, 30)) >>> b = da.random.normal(size=(10, 1,40), chunks=(5, 1, 40)) >>> c = da.apply_gufunc(outer_product, "(i),(j)->(i,j)", a, b, vectorize=True) >>> c.compute().shape (10, 20, 30, 40) References ---------- .. [1] https://docs.scipy.org/doc/numpy/reference/ufuncs.html .. [2] https://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html """ axes = kwargs.pop("axes", None) axis = kwargs.pop("axis", None) keepdims = kwargs.pop("keepdims", False) output_dtypes = kwargs.pop("output_dtypes", None) output_sizes = kwargs.pop("output_sizes", None) vectorize = kwargs.pop("vectorize", None) allow_rechunk = kwargs.pop("allow_rechunk", False) # Input processing: ## Signature if not isinstance(signature, str): raise TypeError('`signature` has to be of type string') input_coredimss, output_coredimss = _parse_gufunc_signature(signature) ## Determine nout: nout = None for functions of one direct return; nout = int for return tuples nout = None if not isinstance(output_coredimss, list) else len(output_coredimss) ## Determine and handle output_dtypes if output_dtypes is None: if vectorize: tempfunc = np.vectorize(func, signature=signature) else: tempfunc = func output_dtypes = apply_infer_dtype(tempfunc, args, kwargs, "apply_gufunc", "output_dtypes", nout) if isinstance(output_dtypes, (tuple, list)): if nout is None: if len(output_dtypes) > 1: raise ValueError(("Must specify single dtype or list of one dtype " "for `output_dtypes` for function with one output")) otypes = output_dtypes output_dtypes = output_dtypes[0] else: otypes = output_dtypes else: if nout is not None: raise ValueError("Must specify tuple of dtypes for `output_dtypes` for function with multiple outputs") otypes = [output_dtypes] ## Vectorize function, if required if vectorize: func = np.vectorize(func, signature=signature, otypes=otypes) ## Miscellaneous if output_sizes is None: output_sizes = {} ## Axes input_axes, output_axes = _validate_normalize_axes(axes, axis, keepdims, input_coredimss, output_coredimss) # Main code: ## Cast all input arrays to dask args = [asarray(a) for a in args] if len(input_coredimss) != len(args): ValueError("According to `signature`, `func` requires %d arguments, but %s given" % (len(input_coredimss), len(args))) ## Axes: transpose input arguments transposed_args = [] for arg, iax, input_coredims in zip(args, input_axes, input_coredimss): shape = arg.shape iax = tuple(a if a < 0 else a - len(shape) for a in iax) tidc = tuple(i for i in range(-len(shape) + 0, 0) if i not in iax) + iax transposed_arg = arg.transpose(tidc) transposed_args.append(transposed_arg) args = transposed_args ## Assess input args for loop dims input_shapes = [a.shape for a in args] input_chunkss = [a.chunks for a in args] num_loopdims = [len(s) - len(cd) for s, cd in zip(input_shapes, input_coredimss)] max_loopdims = max(num_loopdims) if num_loopdims else None core_input_shapes = [dict(zip(icd, s[n:])) for s, n, icd in zip(input_shapes, num_loopdims, input_coredimss)] core_shapes = merge(*core_input_shapes) core_shapes.update(output_sizes) loop_input_dimss = [tuple("__loopdim%d__" % d for d in range(max_loopdims - n, max_loopdims)) for n in num_loopdims] input_dimss = [l + c for l, c in zip(loop_input_dimss, input_coredimss)] loop_output_dims = max(loop_input_dimss, key=len) if loop_input_dimss else tuple() ## Assess input args for same size and chunk sizes ### Collect sizes and chunksizes of all dims in all arrays dimsizess = {} chunksizess = {} for dims, shape, chunksizes in zip(input_dimss, input_shapes, input_chunkss): for dim, size, chunksize in zip(dims, shape, chunksizes): dimsizes = dimsizess.get(dim, []) dimsizes.append(size) dimsizess[dim] = dimsizes chunksizes_ = chunksizess.get(dim, []) chunksizes_.append(chunksize) chunksizess[dim] = chunksizes_ ### Assert correct partitioning, for case: for dim, sizes in dimsizess.items(): #### Check that the arrays have same length for same dimensions or dimension `1` if set(sizes).union({1}) != {1, max(sizes)}: raise ValueError("Dimension `'{}'` with different lengths in arrays".format(dim)) if not allow_rechunk: chunksizes = chunksizess[dim] #### Check if core dimensions consist of only one chunk if (dim in core_shapes) and (chunksizes[0][0] < core_shapes[dim]): raise ValueError("Core dimension `'{}'` consists of multiple chunks. To fix, rechunk into a single \ chunk along this dimension or set `allow_rechunk=True`, but beware that this may increase memory usage \ significantly.".format(dim)) #### Check if loop dimensions consist of same chunksizes, when they have sizes > 1 relevant_chunksizes = list(unique(c for s, c in zip(sizes, chunksizes) if s > 1)) if len(relevant_chunksizes) > 1: raise ValueError("Dimension `'{}'` with different chunksize present".format(dim)) ## Apply function - use blockwise here arginds = list(concat(zip(args, input_dimss))) ### Use existing `blockwise` but only with loopdims to enforce ### concatenation for coredims that appear also at the output ### Modifying `blockwise` could improve things here. tmp = blockwise( func, loop_output_dims, *arginds, dtype=int, # Only dummy dtype, anyone will do concatenate=True, **kwargs ) ## Prepare output shapes loop_output_shape = tmp.shape loop_output_chunks = tmp.chunks keys = list(flatten(tmp.__dask_keys__())) name, token = keys[0][0].split('-') ### *) Treat direct output if nout is None: output_coredimss = [output_coredimss] output_dtypes = [output_dtypes] ## Split output leaf_arrs = [] for i, ocd, odt, oax in zip(count(0), output_coredimss, output_dtypes, output_axes): core_output_shape = tuple(core_shapes[d] for d in ocd) core_chunkinds = len(ocd) * (0,) output_shape = loop_output_shape + core_output_shape output_chunks = loop_output_chunks + core_output_shape leaf_name = "%s_%d-%s" % (name, i, token) leaf_dsk = {(leaf_name,) + key[1:] + core_chunkinds: ((getitem, key, i) if nout else key) for key in keys} graph = HighLevelGraph.from_collections(leaf_name, leaf_dsk, dependencies=[tmp]) leaf_arr = Array(graph, leaf_name, chunks=output_chunks, shape=output_shape, dtype=odt) ### Axes: if keepdims: slices = len(leaf_arr.shape) * (slice(None),) + len(oax) * (np.newaxis,) leaf_arr = leaf_arr[slices] tidcs = [None] * len(leaf_arr.shape) for i, oa in zip(range(-len(oax), 0), oax): tidcs[oa] = i j = 0 for i in range(len(tidcs)): if tidcs[i] is None: tidcs[i] = j j += 1 leaf_arr = leaf_arr.transpose(tidcs) leaf_arrs.append(leaf_arr) return leaf_arrs if nout else leaf_arrs[0] # Undo *) from above
system.CreateByzantium.configure( __name__='opcode:CREATE', mnemonic=mnemonics.CREATE, gas_cost=constants.GAS_CREATE, )(), # TODO: CREATE2 # # Storage # opcode_values.SSTORE: as_opcode( logic_fn=ensure_no_static(storage.sstore), mnemonic=mnemonics.SSTORE, gas_cost=constants.GAS_NULL, ), # # Self Destruct # opcode_values.SELFDESTRUCT: as_opcode( logic_fn=ensure_no_static(system.selfdestruct_eip161), mnemonic=mnemonics.SELFDESTRUCT, gas_cost=GAS_SELFDESTRUCT_EIP150, ), } BYZANTIUM_OPCODES = merge( copy.deepcopy(SPURIOUS_DRAGON_OPCODES), UPDATED_OPCODES, )
def test_apply_transaction( # noqa: F811 chain_without_block_validation, funded_address, funded_address_private_key): chain = chain_without_block_validation # noqa: F811 # Don't change these variables vm = chain.get_vm() chaindb = copy.deepcopy(vm.chaindb) block0 = copy.deepcopy(vm.block) prev_block_hash = chain.get_canonical_block_by_number(0).hash initial_state_root = vm.block.header.state_root # (1) Get VM.apply_transaction(transaction) result for assertion # The first transaction chain1 = copy.deepcopy(chain) vm_example = chain1.get_vm() recipient1 = decode_hex('0x1111111111111111111111111111111111111111') amount = 100 from_ = funded_address tx1 = new_transaction( vm_example, from_, recipient1, amount, private_key=funded_address_private_key, ) computation, result_block = vm_example.apply_transaction(tx1) # The second transaction recipient2 = decode_hex('0x2222222222222222222222222222222222222222') tx2 = new_transaction( vm_example, from_, recipient2, amount, private_key=funded_address_private_key, ) computation, result_block = vm_example.apply_transaction(tx2) assert len(result_block.transactions) == 2 # (2) Test VMState.apply_transaction(...) # Use FrontierVMState to apply transaction chaindb1 = copy.deepcopy(chaindb) block1 = copy.deepcopy(block0) prev_hashes = vm.get_prev_hashes( last_block_hash=prev_block_hash, db=vm.chaindb, ) execution_context = ExecutionContext.from_block_header(block1.header, prev_hashes) vm_state1 = FrontierVMState( chaindb=chaindb1, execution_context=execution_context, state_root=block1.header.state_root, receipts=[], ) parent_hash = copy.deepcopy(prev_hashes[0]) computation, block, _ = vm_state1.apply_transaction( tx1, block1, ) access_logs1 = computation.vm_state.access_logs # Check if prev_hashes hasn't been changed assert parent_hash == prev_hashes[0] # Make sure that block1 hasn't been changed assert block1.header.state_root == initial_state_root execution_context = ExecutionContext.from_block_header(block.header, prev_hashes) vm_state1 = FrontierVMState( chaindb=chaindb1, execution_context=execution_context, state_root=block.header.state_root, receipts=computation.vm_state.receipts, ) computation, block, _ = vm_state1.apply_transaction( tx2, block, ) access_logs2 = computation.vm_state.access_logs post_vm_state = computation.vm_state # Check AccessLogs witness_db = ChainDB(MemoryDB(access_logs2.writes)) state_db = witness_db.get_state_db(block.header.state_root, read_only=True) assert state_db.get_balance(recipient2) == amount with pytest.raises(KeyError): _ = state_db.get_balance(recipient1) # Check block data are correct assert block.header.state_root == result_block.header.state_root assert block.header.gas_limit == result_block.header.gas_limit assert block.header.gas_used == result_block.header.gas_used assert block.header.transaction_root == result_block.header.transaction_root assert block.header.receipt_root == result_block.header.receipt_root # Make sure that vm_state1 hasn't been changed assert post_vm_state.state_root == result_block.header.state_root # (3) Testing using witness as db data # Witness_db block2 = copy.deepcopy(block0) witness_db = ChainDB(MemoryDB(access_logs1.reads)) prev_hashes = vm.get_prev_hashes( last_block_hash=prev_block_hash, db=vm.chaindb, ) execution_context = ExecutionContext.from_block_header(block2.header, prev_hashes) # Apply the first transaction vm_state2 = FrontierVMState( chaindb=witness_db, execution_context=execution_context, state_root=block2.header.state_root, receipts=[], ) computation, block, _ = vm_state2.apply_transaction( tx1, block2, ) # Update witness_db recent_trie_nodes = merge(access_logs2.reads, access_logs1.writes) witness_db = ChainDB(MemoryDB(recent_trie_nodes)) execution_context = ExecutionContext.from_block_header(block.header, prev_hashes) # Apply the second transaction vm_state2 = FrontierVMState( chaindb=witness_db, execution_context=execution_context, state_root=block.header.state_root, receipts=computation.vm_state.receipts, ) computation, block, _ = vm_state2.apply_transaction( tx2, block, ) # After applying assert block.header.state_root == computation.vm_state.state_root assert block.header.transaction_root == result_block.header.transaction_root assert block.header.receipt_root == result_block.header.receipt_root assert block.hash == result_block.hash # (3) Testing using witness_db and block_header to reconstruct vm_state prev_hashes = vm.get_prev_hashes( last_block_hash=prev_block_hash, db=vm.chaindb, ) execution_context = ExecutionContext.from_block_header(block.header, prev_hashes) vm_state3 = FrontierVMState( chaindb=witness_db, execution_context=execution_context, state_root=block.header.state_root, ) assert vm_state3.state_root == post_vm_state.state_root assert vm_state3.state_root == result_block.header.state_root
def fill_transaction_defaults(transaction): return merge(TRANSACTION_DEFAULTS, transaction)
def apply_gufunc(func, signature, *args, **kwargs): """ Apply a generalized ufunc or similar python function to arrays. ``signature`` determines if the function consumes or produces core dimensions. The remaining dimensions in given input arrays (``*args``) are considered loop dimensions and are required to broadcast naturally against each other. In other terms, this function is like np.vectorize, but for the blocks of dask arrays. If the function itself shall also be vectorized use ``vectorize=True`` for convenience. Parameters ---------- func : callable Function to call like ``func(*args, **kwargs)`` on input arrays (``*args``) that returns an array or tuple of arrays. If multiple arguments with non-matching dimensions are supplied, this function is expected to vectorize (broadcast) over axes of positional arguments in the style of NumPy universal functions [1]_ (if this is not the case, set ``vectorize=True``). If this function returns multiple outputs, ``output_core_dims`` has to be set as well. signature: string Specifies what core dimensions are consumed and produced by ``func``. According to the specification of numpy.gufunc signature [2]_ *args : numeric Input arrays or scalars to the callable function. output_dtypes : dtype or list of dtypes, keyword only dtype or list of output dtypes. output_sizes : dict, optional, keyword only Optional mapping from dimension names to sizes for outputs. Only used if new core dimensions (not found on inputs) appear on outputs. vectorize: bool, keyword only If set to ``True``, ``np.vectorize`` is applied to ``func`` for convenience. Defaults to ``False``. allow_rechunk: Optional, bool, keyword only Allows rechunking, otherwise chunk sizes need to match and core dimensions are to consist only of one chunk. Warning: enabling this can increase memory usage significantly. Defaults to ``False``. **kwargs : dict Extra keyword arguments to pass to `func` Returns ------- Single dask.array.Array or tuple of dask.array.Array Examples -------- >>> import dask.array as da >>> import numpy as np >>> def stats(x): ... return np.mean(x, axis=-1), np.std(x, axis=-1) >>> a = da.random.normal(size=(10,20,30), chunks=(5, 10, 30)) >>> mean, std = da.apply_gufunc(stats, "(i)->(),()", a, output_dtypes=2*(a.dtype,)) >>> mean.compute().shape (10, 20) >>> def outer_product(x, y): ... return np.einsum("i,j->ij", x, y) >>> a = da.random.normal(size=( 20,30), chunks=(10, 30)) >>> b = da.random.normal(size=(10, 1,40), chunks=(5, 1, 40)) >>> c = da.apply_gufunc(outer_product, "(i),(j)->(i,j)", a, b, output_dtypes=a.dtype, vectorize=True) >>> c.compute().shape (10, 20, 30, 40) References ---------- .. [1] http://docs.scipy.org/doc/numpy/reference/ufuncs.html .. [2] http://docs.scipy.org/doc/numpy/reference/c-api.generalized-ufuncs.html """ output_dtypes = kwargs.pop("output_dtypes", None) output_sizes = kwargs.pop("output_sizes", None) vectorize = kwargs.pop("vectorize", None) allow_rechunk = kwargs.pop("allow_rechunk", False) # Input processing: ## Signature if not isinstance(signature, str): raise TypeError('`signature` has to be of type string') core_input_dimss, core_output_dimss = _parse_gufunc_signature(signature) ## Determine nout: nout = None for functions of one direct return; nout = int for return tuples nout = None if not isinstance(core_output_dimss, list) else len(core_output_dimss) ## Assert output_dtypes if output_dtypes is None: raise ValueError("Must specify `output_dtypes` of output array(s)") elif isinstance(output_dtypes, str): otypes = list(output_dtypes) output_dtypes = otypes[0] if nout is None else otypes elif isinstance(output_dtypes, (tuple, list)): if nout is None: raise ValueError( "Must specify single dtype for `output_dtypes` for function with one output" ) otypes = output_dtypes else: if nout is not None: raise ValueError( "Must specify tuple of dtypes for `output_dtypes` for function with multiple outputs" ) otypes = [output_dtypes] ## Vectorize function, if required if vectorize: func = np.vectorize(func, signature=signature, otypes=otypes) ## Miscellaneous if output_sizes is None: output_sizes = {} # Main code: ## Cast all input arrays to dask args = [asarray(a) for a in args] if len(core_input_dimss) != len(args): ValueError( "According to `signature`, `func` requires %d arguments, but %s given" % (len(core_output_dimss), len(args))) ## Assess input args for loop dims input_shapes = [a.shape for a in args] input_chunkss = [tuple(c[0] for c in a.chunks) for a in args] num_loopdims = [ len(s) - len(cd) for s, cd in zip(input_shapes, core_input_dimss) ] max_loopdims = max(num_loopdims) if num_loopdims else None _core_input_shapes = [ dict(zip(cid, s[n:])) for s, n, cid in zip(input_shapes, num_loopdims, core_input_dimss) ] core_shapes = merge(output_sizes, *_core_input_shapes) loop_input_dimss = [ tuple("__loopdim%d__" % d for d in range(max_loopdims - n, max_loopdims)) for n in num_loopdims ] input_dimss = [l + c for l, c in zip(loop_input_dimss, core_input_dimss)] loop_output_dims = max(loop_input_dimss, key=len) if loop_input_dimss else set() ## Assess input args for same size and chunk sizes ### Collect sizes and chunksizes of all dims in all arrays dimsizess = {} chunksizess = {} for dims, shape, chunksizes in zip(input_dimss, input_shapes, input_chunkss): for dim, size, chunksize in zip(dims, shape, chunksizes): _dimsizes = dimsizess.get(dim, []) _dimsizes.append(size) dimsizess[dim] = _dimsizes _chunksizes = chunksizess.get(dim, []) _chunksizes.append(chunksize) chunksizess[dim] = _chunksizes ### Assert correct partitioning, for case: if not allow_rechunk: for dim, sizes in dimsizess.items(): ### Check that the arrays have same length for same dimensions or dimension `1` if set(sizes).union({1}) != {1, max(sizes)}: raise ValueError( "Dimension `'{}'` with different lengths in arrays".format( dim)) chunksizes = chunksizess[dim] ### Check if core dimensions consist of only one chunk if (dim in core_shapes) and (chunksizes[0] < core_shapes[dim]): raise ValueError( "Core dimension `'{}'` consists of multiple chunks. To fix, rechunk into a single \ chunk along this dimension or set `allow_rechunk=True`, but beware that this may increase memory usage \ significantly.".format(dim)) ### Check if loop dimensions consist of same chunksizes, when they have sizes > 1 relevant_chunksizes = list( unique(c for s, c in zip(sizes, chunksizes) if s > 1)) if len(relevant_chunksizes) > 1: raise ValueError( "Dimension `'{}'` with different chunksize present".format( dim)) ## Apply function - use atop here arginds = list(concat(zip(args, input_dimss))) ### Use existing `atop` but only with loopdims to enforce ### concatenation for coredims that appear also at the output ### Modifying `atop` could improve things here. tmp = atop( func, loop_output_dims, *arginds, dtype=int, # Only dummy dtype, anyone will do concatenate=True, **kwargs) ## Prepare output shapes loop_output_shape = tmp.shape loop_output_chunks = tmp.chunks dsk = tmp.__dask_graph__() keys = list(flatten(tmp.__dask_keys__())) _anykey = keys[0] name, token = _anykey[0].split('-') ### *) Treat direct output if nout is None: core_output_dimss = [core_output_dimss] output_dtypes = [output_dtypes] ## Split output leaf_arrs = [] for i, cod, odt in zip(count(0), core_output_dimss, output_dtypes): core_output_shape = tuple(core_shapes[d] for d in cod) core_chunkinds = len(cod) * (0, ) output_shape = loop_output_shape + core_output_shape output_chunks = loop_output_chunks + core_output_shape leaf_name = "%s_%d-%s" % (name, i, token) leaf_dsk = {(leaf_name, ) + key[1:] + core_chunkinds: ((getitem, key, i) if nout else key) for key in keys} leaf_arr = Array(sharedict.merge((leaf_name, leaf_dsk), dsk), leaf_name, chunks=output_chunks, shape=output_shape, dtype=odt) leaf_arrs.append(leaf_arr) return leaf_arrs if nout else leaf_arrs[0] # Undo *) from above
from evm.vm.forks.tangerine_whistle.opcodes import TANGERINE_WHISTLE_OPCODES from .constants import (GAS_EXP_EIP160, GAS_EXPBYTE_EIP160) UPDATED_OPCODES = { opcode_values.EXP: as_opcode( logic_fn=arithmetic.exp(gas_per_byte=GAS_EXPBYTE_EIP160), mnemonic=mnemonics.EXP, gas_cost=GAS_EXP_EIP160, ), opcode_values.SELFDESTRUCT: as_opcode( logic_fn=system.selfdestruct_eip161, mnemonic=mnemonics.SELFDESTRUCT, gas_cost=GAS_SELFDESTRUCT_EIP150, ), opcode_values.CALL: call.CallEIP161.configure( __name__='opcode:CALL', mnemonic=mnemonics.CALL, gas_cost=GAS_CALL_EIP150, )(), } SPURIOUS_DRAGON_OPCODES = merge( copy.deepcopy(TANGERINE_WHISTLE_OPCODES), UPDATED_OPCODES, )