def parser(num, boxes, scores, classes, **kargs): min_score = kargs.pop("min_score", 0.5) max_predictions = kargs.pop("max_predictions", 20) image_shape = kargs.pop("image_shape", None) if image_shape is not None: normalized_predictions = True else: normalized_predictions = False predictions = dict(num_detections=num, boxes=boxes, scores=scores, classes=classes) predictions['num_detections'] = int( predictions['num_detections'][0].tolist()) predictions['classes'] = predictions['classes'][0].astype( np.uint8).tolist() predictions['boxes'] = predictions['boxes'][0].tolist() predictions['scores'] = predictions['scores'][0].tolist() predictions = zip(predictions['classes'], predictions['scores'], predictions['boxes']) if normalized_predictions: predictions = map(lambda tup: create_pred(tup, image_shape), predictions) else: predictions = map(lambda tup: create_pred(tup, None), predictions) predictions = filter(P["score"] > min_score, predictions) predictions = sorted(predictions, key=P["score"]) predictions = cz.take(max_predictions, predictions) predictions = list(predictions) return predictions
def extend(self, seq): if isinstance(seq, Iterator): start_available_memory = psutil.avail_phymem() # Two bounds to avoid hysteresis target_low = 0.4 * start_available_memory target_high = 0.6 * start_available_memory # Pull chunksize from last run chunksize = global_chunksize[0] empty = False while not empty: chunk = tuple(take(chunksize, seq)) self.extend_chunk(chunk) # tweak chunksize if necessary available_memory = psutil.avail_phymem() if len(chunk) == chunksize: if available_memory > target_high: chunksize = int(chunksize * 1.6) elif available_memory < target_low: chunksize = int(chunksize / 1.6) empty, seq = isempty(seq) global_chunksize[0] = chunksize else: self.extend_chunk(seq)
def detection(ctx, cfg): with workers(cfg) as w: if get('test_detection_exception', ctx, None) is not None: return merge(ctx, exception(msg='test_detection_exception', http_status=500)) else: return merge(ctx, {'detections': list(flatten(w.map(detect, take(ctx['test_pixel_count'], ctx['timeseries']))))})
def generate_command(model: LanguageModel, seed: str, characters: int): """ Generate text from a model. """ sys.stdout.write(seed) for character in take(characters, (str(token) for token in model.generate(list(seed)) if not token.is_meta)): sys.stdout.write(character) print()
def profile(cmd, lang, inputs=None): """ Profile a spaCy pipeline, to find out which functions take the most time. """ nlp = spacy.load(lang) texts = list(cytoolz.take(10000, inputs)) cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats()
def stack_channels(images, order=[0, 1, 2]): """Stack multiple image files to one single, multi-channel image. Parameters ---------- images : list of array, shape (M, N) The images to be concatenated. List should contain three images. Entries 'None' are considered to be dummy channels channel_order : list of int, optional The order the channels should be in in the final image. Returns ------- stack_image : array, shape (M, N, 3) The concatenated, three channel image. Examples -------- >>> image1 = np.ones((2, 2), dtype=int) * 1 >>> image2 = np.ones((2, 2), dtype=int) * 2 >>> joined = stack_channels((None, image1, image2)) >>> joined.shape (2, 2, 3) >>> joined[0, 0] array([0, 1, 2]) >>> joined = stack_channels((image1, image2), order=[None, 0, 1]) >>> joined.shape (2, 2, 3) >>> joined[0, 0] array([0, 1, 2]) """ # ensure we support iterators images = list(tlz.take(len(order), images)) # ensure we grab an image and not `None` def is_array(obj): return isinstance(obj, np.ndarray) image_prototype = next(filter(is_array, images)) # A `None` in `order` implies no image at that position ordered_ims = [images[i] if i is not None else None for i in order] ordered_ims = [ np.zeros_like(image_prototype) if image is None else image for image in ordered_ims ] # stack images with np.dstack, but if only a single channel is passed, # don't add an extra dimension stack_image = np.squeeze(np.dstack(ordered_ims)) while ordered_ims: del ordered_ims[-1] return stack_image
def montage(ims, order=None): """Stitch together a list of images according to a specified pattern. The order pattern should be an array of integers where each element corresponds to the index of the image in the fns list. eg if order = [[20, 21, 22, 23, 24], [19, 6, 7, 8, 9], [18, 5, 0, 1, 10], [17, 4, 3, 2, 11], [16, 15, 14, 13, 12]] This order will stitch together 25 images in a clockwise spiral pattern. Parameters ---------- ims : iterable of array, shape (M, N[, 3]) The list of the image files to be stitched together. If None, this parameter defaults to the order given above. order : array-like of int, shape (P, Q) The order of the stitching, with each entry referring to the index of file in the fns array. Returns ------- montaged : array, shape (M * P, N * Q[, 3]) The stitched image. Examples -------- >>> ims = [np.zeros((2, 2), dtype=np.uint8), ... 2 * np.ones((2, 2), dtype=np.uint8)] >>> order = [1, 0] >>> montage(ims, order) array([[2, 2, 0, 0], [2, 2, 0, 0]], dtype=uint8) """ if order is None: from .screens import cellomics order = cellomics.SPIRAL_CLOCKWISE_RIGHT_25 order = np.atleast_2d(order) # in case stream is passed, take one sip at a time ;) ims = list(tlz.take(order.size, ims)) rows, cols = ims[0].shape[:2] mrows, mcols = order.shape montaged = np.zeros((rows * mrows, cols * mcols) + ims[0].shape[2:], dtype=ims[0].dtype) for i in range(mrows): for j in range(mcols): montaged[rows * i:rows * (i + 1), cols * j:cols * (j + 1)] = ims[order[i, j]] return montaged
def generate_command(model: LanguageModel, seed: str, characters: int): """ Generate text from a model. """ sys.stdout.write(seed) for character in take( characters, (str(token) for token in model.generate(list(seed)) if not token.is_meta)): sys.stdout.write(character) print()
def montage(ims, order=None): """Stitch together a list of images according to a specified pattern. The order pattern should be an array of integers where each element corresponds to the index of the image in the fns list. eg if order = [[20, 21, 22, 23, 24], [19, 6, 7, 8, 9], [18, 5, 0, 1, 10], [17, 4, 3, 2, 11], [16, 15, 14, 13, 12]] This order will stitch together 25 images in a clockwise spiral pattern. Parameters ---------- ims : iterable of array, shape (M, N[, 3]) The list of the image files to be stitched together. If None, this parameter defaults to the order given above. order : array-like of int, shape (P, Q) The order of the stitching, with each entry referring to the index of file in the fns array. Returns ------- montaged : array, shape (M * P, N * Q[, 3]) The stitched image. Examples -------- >>> ims = [np.zeros((2, 2), dtype=np.uint8), ... 2 * np.ones((2, 2), dtype=np.uint8)] >>> order = [1, 0] >>> montage(ims, order) array([[2, 2, 0, 0], [2, 2, 0, 0]], dtype=uint8) """ if order is None: from .screens import cellomics order = cellomics.SPIRAL_CLOCKWISE_RIGHT_25 order = np.atleast_2d(order) # in case stream is passed, take one sip at a time ;) ims = list(tz.take(order.size, ims)) rows, cols = ims[0].shape[:2] mrows, mcols = order.shape montaged = np.zeros((rows * mrows, cols * mcols) + ims[0].shape[2:], dtype=ims[0].dtype) for i in range(mrows): for j in range(mcols): montaged[rows*i:rows*(i+1), cols*j:cols*(j+1)] = ims[order[i, j]] return montaged
def stack_channels(images, order=[0, 1, 2]): """Stack multiple image files to one single, multi-channel image. Parameters ---------- images : list of array, shape (M, N) The images to be concatenated. List should contain three images. Entries 'None' are considered to be dummy channels channel_order : list of int, optional The order the channels should be in in the final image. Returns ------- stack_image : array, shape (M, N, 3) The concatenated, three channel image. Examples -------- >>> image1 = np.ones((2, 2), dtype=int) * 1 >>> image2 = np.ones((2, 2), dtype=int) * 2 >>> joined = stack_channels((None, image1, image2)) >>> joined.shape (2, 2, 3) >>> joined[0, 0] array([0, 1, 2]) >>> joined = stack_channels((image1, image2), order=[None, 0, 1]) >>> joined.shape (2, 2, 3) >>> joined[0, 0] array([0, 1, 2]) """ # ensure we support iterators images = list(tz.take(len([pos for pos in order if pos is not None]), images)) # ensure we grab an image and not `None` def is_array(obj): return isinstance(obj, np.ndarray) image_prototype = next(filter(is_array, images)) # A `None` in `order` implies no image at that position ordered_ims = [images[i] if i is not None else None for i in order] ordered_ims = [np.zeros_like(image_prototype) if image is None else image for image in ordered_ims] # stack images with np.dstack, but if only a single channel is passed, # don't add an extra dimension stack_image = np.squeeze(np.dstack(ordered_ims)) while ordered_ims: del ordered_ims[-1] return stack_image
def profile(cmd, lang, inputs=None): """ Profile a spaCy pipeline, to find out which functions take the most time. """ if inputs is None: imdb_train, _ = thinc.extra.datasets.imdb() inputs, _ = zip(*imdb_train) inputs = inputs[:25000] nlp = spacy.load(lang) texts = list(cytoolz.take(10000, inputs)) cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats()
def from_program(cls, program: Program): code = program.cur idx = program.pc int_code = code % 100 str_code = f'{code:05d}' if debug: print('idx, code:', idx, code) inst_info = opcodes[int_code] param_modes = take(inst_info.arity, map(int, reversed(str_code[:-2]))) addresses = tuple(map(program.get_idx, param_modes, count(idx + 1))) return cls(int_code, addresses, program, inst_info)
def profile(lang, inputs=None): """ Profile a spaCy pipeline, to find out which functions take the most time. """ if inputs is None: imdb_train, _ = thinc.extra.datasets.imdb() inputs, _ = zip(*imdb_train) inputs = inputs[:25000] nlp = spacy.load(lang) texts = list(cytoolz.take(10000, inputs)) cProfile.runctx("parse_texts(nlp, texts)", globals(), locals(), "Profile.prof") s = pstats.Stats("Profile.prof") s.strip_dirs().sort_stats("time").print_stats()
def minibatch(items, size=8): """Iterate over batches of items. `size` may be an iterator, so that batch-size can vary on each step. """ if isinstance(size, int): size_ = itertools.repeat(size) else: size_ = size items = iter(items) while True: batch_size = next(size_) batch = list(cytoolz.take(int(batch_size), items)) if len(batch) == 0: break yield list(batch)
def calc_merkle_tree_from_leaves(leaves: Sequence[Hash32]) -> MerkleTree: if len(leaves) == 0: raise ValueError("No leaves given") n_layers = math.log2(len(leaves)) + 1 if not n_layers.is_integer(): raise ValueError("Number of leaves is not a power of two") n_layers = int(n_layers) reversed_tree = tuple(take(n_layers, iterate(_hash_layer, leaves))) tree = MerkleTree(tuple(reversed(reversed_tree))) if len(tree[0]) != 1: raise Exception("Invariant: There must only be one root") return tree
def calc_merkle_tree(items: Sequence[Hashable]) -> MerkleTree: """Calculate the Merkle tree corresponding to a list of items.""" if len(items) == 0: raise ValidationError("No items given") n_layers = math.log2(len(items)) + 1 if not n_layers.is_integer(): raise ValidationError("Item number is not a power of two") n_layers = int(n_layers) leaves = tuple(keccak(item) for item in items) tree = cast(MerkleTree, tuple(take(n_layers, iterate(_hash_layer, leaves)))[::-1]) if len(tree[0]) != 1: raise Exception("Invariant: There must only be one root") return tree
def process_data(df, params, toy): if toy: plays = cz.take(10, df.groupby("PlayId")) else: plays = df.groupby("PlayId") plays = list(plays) Xs = [] As = [] Es = [] for play_id, df_play in tqdm(plays, desc="Processing Plays"): pos = df_play[["X", "Y"]].to_numpy() A = np.sum((pos[:, np.newaxis, :] - pos[np.newaxis, :, :])**2, axis=-1) A = 1.0 / (1.0 + A) * (1 - np.eye(22)) As.append(normalized_laplacian(A)) E = np.expand_dims(A, axis=-1) Es.append(E) # features features = dict( X=df_play["X"].to_numpy(), Y=df_play["Y"].to_numpy(), S=df_play["S"].to_numpy(), A=df_play["A"].to_numpy(), Orientation=df_play["Orientation"].to_numpy(), Dir=df_play["Dir"].to_numpy(), Team=df_play["Team"].to_numpy(), NflId=df_play["NflId"].to_numpy(), is_rusher=df_play["is_rusher"].to_numpy(), Yards=df_play["Yards"].to_numpy(), ) Xs.append(features) Xs = { feature: np.stack([x[feature] for x in Xs], axis=0) for feature in Xs[0] } Es = np.stack(Es, axis=0).astype(np.float32) As = np.stack(As, axis=0).astype(np.float32) return Xs, As, Es
def partition_all(n, seq): """ Take chunks from the sequence, n elements at a time >>> parts = partition_all(3, [1, 2, 3, 4, 5, 6, 7, 8]) >>> for part in parts: ... print(tuple(part)) (1, 2, 3) (4, 5, 6) (7, 8) The results are themselves lazy and so must be evaluated entirely before the next block is requested """ seq = iter(seq) stop, seq = isempty(seq) while not stop: yield take(n, seq) stop, seq = isempty(seq)
def get_all_paths(coll, prefix_path=(), stop_at=None, stop_below=None): """Given a collection, by default returns paths to all the leaf nodes. Use stop_at to truncate paths at the given key. Use stop_below to truncate paths one level below the given key. """ assert ( stop_at is None or stop_below is None ), "Only one of stop_at or stop_below can be used." if stop_below is not None and stop_below in str( tz.last(tz.take(len(prefix_path) - 1, prefix_path)) ): return [[]] if stop_at is not None and stop_at in str(tz.last(prefix_path)): return [[]] if isinstance(coll, Mapping) or isinstance(coll, list): if isinstance(coll, Mapping): items = coll.items() else: items = enumerate(coll) return list( tz.concat( map( lambda t: list( map( lambda p: [t[0]] + p, get_all_paths( t[1], prefix_path=list(prefix_path) + [t[0]], stop_at=stop_at, stop_below=stop_below, ), ) ), items, ) ) ) else: return [[]]
def get_ancestors(self, limit: int, header: BlockHeader) -> Tuple[BaseBlock, ...]: """ Return `limit` number of ancestor blocks from the current canonical head. """ ancestor_count = min(header.block_number, limit) # We construct a temporary block object vm_class = self.get_vm_class_for_block_number(header.block_number) block_class = vm_class.get_block_class() block = block_class(header=header, uncles=[]) ancestor_generator = iterate(compose( self.get_block_by_hash, operator.attrgetter('parent_hash'), operator.attrgetter('header'), ), block) # we peel off the first element from the iterator which will be the # temporary block object we constructed. next(ancestor_generator) return tuple(take(ancestor_count, ancestor_generator))
def _overlapping_blocks(iterable, nblock, noverlap): """Partition iterable into overlapping blocks of size `nblock`. :param iterable: Iterable. :param nblock: Samples per block. :param noverlap: Amount of samples to overlap. :returns: Blocks. """ iterator = iter(iterable) nadvance = nblock - noverlap if nadvance < 1: raise ValueError("`noverlap` has to be smaller than `nblock-1`.") # First `noverlap` samples previous = list(cytoolz.take(noverlap, iterator)) advances = map(list, cytoolz.partition(nadvance, iterator)) for advance in advances: block = previous + advance # Concat lists yield block previous = block[-noverlap:]
def fit(self, model, epochs, steps_per_epoch, validation_steps): for epoch in range(epochs): # loop over the dataset multiple times running_loss = 0.0 running_acc = 0.0 trainloader = self.get_loader(self.ds_train) trainloader = cz.take(steps_per_epoch, trainloader) for step, data in enumerate(trainloader): # get the inputs; data is a list of [inputs, labels] inputs, labels = data # plot_batch(inputs.numpy(), labels.numpy()) # zero the parameter gradients self.optimizer.zero_grad() # forward + backward + optimize outputs = model(inputs) loss = self.loss_fn(outputs, labels) acc = (outputs.argmax(1) == labels).float().mean() loss.backward() self.optimizer.step() # print statistics running_loss += loss.item() running_acc += acc.item() print( f"[{epoch}] loss: {running_loss / (step + 1)}, acc: {running_acc / (step + 1)}" ) running_loss = 0.0 running_acc = 0.0
def test_cycle(self): nsamples = 10 stream = range(nsamples) cycled_stream = cycle(stream) cycled = list(cytoolz.take(nsamples * 2, cycled_stream)) assert np.allclose(cycled[0:nsamples], cycled[nsamples:])
def process(self, elements: tp.Iterable[tr.Element]) -> tp.Iterable[tr.Element]: return cz.take(self.n, elements)
hash_eth2, ) from eth_typing import ( Hash32, ) from .common import ( # noqa: F401 _calc_parent_hash, _hash_layer, get_branch_indices, get_merkle_proof, get_root, MerkleTree, MerkleProof, ) if TYPE_CHECKING: from typing import Tuple # noqa: F401 TreeDepth = 32 EmptyNodeHashes = tuple( take( TreeDepth, iterate(lambda node_hash: hash_eth2(node_hash + node_hash), b'\x00' * 32))) def verify_merkle_proof(root: Hash32, leaf: Hash32, index: int, proof: MerkleProof) -> bool: """ Verify that the given ``item`` is on the merkle branch ``proof`` starting with the given ``root``. """ assert len(proof) == TreeDepth value = leaf for i in range(TreeDepth): if index // (2**i) % 2: value = hash_eth2(proof[i] + value) else:
def get_branch_indices(node_index: int, depth: int) -> Sequence[int]: """Get the indices of all ancestors up until the root for a node with a given depth.""" return tuple(take(depth, iterate(lambda index: index // 2, node_index)))
def compute_up(t, seq, **kwargs): if t.n < 100: return tuple(take(t.n, seq)) else: return take(t.n, seq)
def take(self, n): """Take the first `n`.""" return self._construct(cytoolz.take(n, self._iterator))
try: list_data[index] = new_value except IndexError: raise ValidationError( "the length of the given tuple_data is {}, the given index {} is out of index".format( len(tuple_data), index, ) ) else: return tuple(list_data) TreeHeight = 32 EmptyNodeHashes = tuple( take(TreeHeight, iterate(lambda node_hash: hash_eth2(node_hash + node_hash), b'\x00' * 32)) ) def get_merkle_proof(tree: MerkleTree, item_index: int) -> Iterable[Hash32]: """ Read off the Merkle proof for an item from a Merkle tree. """ if item_index < 0 or item_index >= len(tree[-1]) or tree[-1][item_index] == EmptyNodeHashes[0]: raise ValidationError("Item index out of range") branch_indices = get_branch_indices(item_index, len(tree)) proof_indices = [i ^ 1 for i in branch_indices][:-1] # get sibling by flipping rightmost bit return tuple( layer[proof_index] for layer, proof_index
def compute(t, seq): parent = compute(t.parent, seq) return tuple(take(t.n, parent))
def get_scaled_batches( scaled_workers: Dict[Worker, float], source: List[Work], ) -> Dict[Worker, List[Work]]: """ Group elements from source into scaled batches. Each element from source will be present in exactly one of the batches. Batch lengths always round down, and any remaining elements from source will be batched into the highest-scale index. :param scales: amount to scale batches - must be >=0 and !=NaN :param source: list of elements to group into scaled batches :return: list of batches, the same length as scales. Batches *may be empty*. """ scales = tuple(scaled_workers.values()) if len(set(source)) != len(source): raise ValidationError("Elements to batch must be unique") elif len(scales) == 0: raise ValidationError( "Must have at least one target to batch elements into") elif any(math.isnan(scale) for scale in scales): raise ValidationError( "All scale values must be a number (ie~ not a NaN)") scale_sum = sum(scales) if scale_sum == 0: normalized_scales = {worker: 1.0 for worker in scaled_workers.keys()} total = float(len(scaled_workers)) elif any(math.isinf(scale) for scale in scales): normalized_scales = { worker: 1.0 if math.isinf(scale) else 0.0 for worker, scale in scaled_workers.items() } total = sum(normalized_scales.values()) else: normalized_scales = scaled_workers total = scale_sum fractional_scales = { worker: scale / total for worker, scale in normalized_scales.items() } num_elements = len(source) element_iter = iter(source) batches = {} for worker, fraction in fractional_scales.items(): num_to_take = math.floor(fraction * num_elements) if num_to_take >= 1: batch = list(take(num_to_take, element_iter)) batches[worker] = batch # any elements missed due to rounding error will go to the largest scaled worker remaining = list(element_iter) if remaining: largest_worker = max(fractional_scales.keys(), key=fractional_scales.get) if largest_worker in batches: batches[largest_worker] += remaining else: batches[largest_worker] = remaining return batches
def test_cycle(self): nsamples = 10 stream = range(nsamples) cycled_stream = cycle(stream) cycled = list(cytoolz.take(nsamples*2, cycled_stream)) assert np.allclose( cycled[0:nsamples], cycled[nsamples:])
def aoc05_a(): return ''.join(map(itemgetter(5), take(8, _gen_hash(door_id))))
def contract_similarity(self, contract1, contract2, num_samples=1000): random_samples = take(num_samples, self.sample_generator()) counts = Counter(contract1(s) is contract2(s) for s in random_samples) return counts.get(True, 0) / num_samples
def take(self, nblocks): """Take `nblocks` from stream. """ return self._construct(cytoolz.take(nblocks, self._iterator))
def take(self, n): return self.__class__(cytoolz.take(n, self))
def get_branch_indices(node_index: int, depth: int) -> Iterable[int]: """ Get the indices of all ancestors up until the root for a node with a given depth. """ yield from take(depth, iterate(lambda index: index // 2, node_index))
def partition(plist, sizes): """Partition a list according to sizes""" plist_iter = iter(plist) return [list(tz.take(s, plist_iter)) for s in sizes]
def __call__(self, iterator): self.items = list(cytoolz.take(self.n, iterator)) return iter(self.items)