def post_compute(e, q, d): """ Execute a query using MongoDB's aggregation pipeline The compute_up functions operate on Mongo Collection / list-of-dict queries. Once they're done we need to actually execute the query on MongoDB. We do this using the aggregation pipeline framework. http://docs.mongodb.org/manual/core/aggregation-pipeline/ """ d = {'$project': toolz.merge({'_id': 0}, # remove mongo identifier dict((col, 1) for col in e.fields))} q = q.append(d) if not e.dshape.shape: # not a collection result = q.coll.aggregate(list(q.query))['result'][0] if isscalar(e.dshape.measure): return result[e._name] else: return get(e.fields, result) dicts = q.coll.aggregate(list(q.query))['result'] if isscalar(e.dshape.measure): return list(pluck(e.fields[0], dicts, default=None)) # dicts -> values else: return list(pluck(e.fields, dicts, default=None)) # dicts -> tuples
def resource_append(lists, msg): L = list(msg.values()) if not L: return for k in ['cpu', 'memory-percent']: lists[k].append(mean(pluck(k, L)) / 100) lists['time'].append(mean(pluck('time', L)) * 1000)
def compile_components(summary, schema): """Given a ``Summary`` object and a table schema, returning 5 sub-functions. Parameters ---------- summary : Summary The expression describing the aggregations to be computed. Returns ------- A tuple of the following functions: ``create(shape)`` Takes the aggregate shape, and returns a tuple of initialized numpy arrays. ``info(df)`` Takes a dataframe, and returns preprocessed 1D numpy arrays of the needed columns. ``append(i, x, y, *aggs_and_cols)`` Appends the ``i``th row of the table to the ``(x, y)`` bin, given the base arrays and columns in ``aggs_and_cols``. This does the bulk of the work. ``combine(base_tuples)`` Combine a list of base tuples into a single base tuple. This forms the reducing step in a reduction tree. ``finalize(aggs)`` Given a tuple of base numpy arrays, returns the finalized ``dynd`` array. """ paths, reds = zip(*preorder_traversal(summary)) # List of base reductions (actually computed) bases = list(unique(concat(r._bases for r in reds))) dshapes = [b.out_dshape(schema) for b in bases] # List of tuples of (append, base, input columns, temps) calls = [_get_call_tuples(b, d) for (b, d) in zip(bases, dshapes)] # List of unique column names needed cols = list(unique(concat(pluck(2, calls)))) # List of temps needed temps = list(pluck(3, calls)) create = make_create(bases, dshapes) info = make_info(cols) append = make_append(bases, cols, calls) combine = make_combine(bases, dshapes, temps) finalize = make_finalize(bases, summary, schema) return create, info, append, combine, finalize
def get_profile(history, recent=None, start=None, stop=None, key=None): now = time() if start is None: istart = 0 else: istart = bisect.bisect_left(history, (start,)) if stop is None: istop = None else: istop = bisect.bisect_right(history, (stop,)) + 1 if istop >= len(history): istop = None # include end if istart == 0 and istop is None: history = list(history) else: iistop = len(history) if istop is None else istop history = [history[i] for i in range(istart, iistop)] prof = merge(*toolz.pluck(1, history)) if not history: return create() if recent: prof = merge(prof, recent) return prof
def records_to_tuples(ds, data): """ Transform records into tuples Examples -------- >>> seq = [{'a': 1, 'b': 10}, {'a': 2, 'b': 20}] >>> list(records_to_tuples('var * {a: int, b: int}', seq)) [(1, 10), (2, 20)] >>> records_to_tuples('{a: int, b: int}', seq[0]) # single elements (1, 10) >>> records_to_tuples('var * int', [1, 2, 3]) # pass through on non-records [1, 2, 3] See Also -------- tuples_to_records """ if isinstance(ds, (str, unicode)): ds = dshape(ds) if isinstance(ds.measure, Record) and len(ds.shape) == 1: return pluck(ds.measure.names, data, default=None) if isinstance(ds.measure, Record) and len(ds.shape) == 0: return get(ds.measure.names, data) if not isinstance(ds.measure, Record): return data raise NotImplementedError()
def dot_graph(filename='conversions'): # Edges from Convert dg = nx.DiGraph() for a, b in convert.graph.edges(): cost = convert.graph.edge[a][b]['cost'] dg.add_edge(cls_name(a), cls_name(b), cost=cost, penwidth=max(log(1./(cost + 0.06)), 1)) # Edges from Append for a, b in append.funcs: if b is not object and a != b: dg.add_edge(cls_name(b), cls_name(a), color='blue') # Color edges for n in convert.graph.nodes() + list(pluck(0, append.funcs)): if issubclass(n, tuple(ooc_types)): dg.node[cls_name(n)]['color'] = 'red' # Convert to pydot p = nx.to_pydot(dg) p.set_overlap(False) p.set_splines(True) with open(filename + '.dot', 'w') as f: f.write(p.to_string()) os.system('neato -Tpdf %s.dot -o %s.pdf' % (filename, filename)) print("Writing graph to %s.pdf" % filename) os.system('neato -Tpng %s.dot -o %s.png' % (filename, filename)) print("Writing graph to %s.png" % filename)
def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) result = yield aa.identity() assert not a.active response = yield aa.compute(key='x', function=dumps(add), args=dumps([1, 2]), who_has={}, close=True) assert not a.active assert response['status'] == 'OK' assert a.data['x'] == 3 assert c.who_has['x'] == {a.address} assert isinstance(response['compute-start'], float) assert isinstance(response['compute-stop'], float) assert isinstance(response['thread'], int) response = yield bb.compute(key='y', function=dumps(add), args=dumps(['x', 10]), who_has={'x': [a.address]}) assert response['status'] == 'OK' assert b.data['y'] == 13 assert c.who_has['y'] == {b.address} assert response['nbytes'] == sizeof(b.data['y']) assert isinstance(response['transfer-start'], float) assert isinstance(response['transfer-stop'], float) def bad_func(): 1 / 0 response = yield bb.compute(key='z', function=dumps(bad_func), args=dumps(()), close=True) assert not b.active assert response['status'] == 'error' assert isinstance(loads(response['exception']), ZeroDivisionError) if sys.version_info[0] >= 3: assert any('1 / 0' in line for line in pluck(3, traceback.extract_tb( loads(response['traceback']))) if line) aa.close_streams() yield a._close() assert a.address not in c.ncores and b.address in c.ncores assert list(c.ncores.keys()) == [b.address] assert isinstance(b.address, str) assert b.ip in b.address assert str(b.port) in b.address bb.close_streams() yield b._close()
def _set_last_times(self, platform_id, fetched): with self.times_lock: try: new_max = max(pluck(1, concat(fetched.itervalues()))) if new_max > self._last_times.get(platform_id, 0): self._last_times[platform_id] = new_max except ValueError: pass
def test_append_convert(empty_bank, raw_bank): ds = discover(raw_bank) assert set(ds.measure.names) == {'name', 'amount'} append(empty_bank, raw_bank, dshape=ds) assert odo(empty_bank, list, dshape=ds) == list( pluck(ds.measure.names, raw_bank) )
def resource_append(lists, msg): L = list(msg.values()) if not L: return for k in ['cpu', 'memory-percent']: lists[k].append(mean(pluck(k, L)) / 100) lists['time'].append(mean(pluck('time', L)) * 1000) if len(lists['time']) >= 2: t1, t2 = lists['time'][-2], lists['time'][-1] interval = (t2 - t1) / 1000 else: interval = 0.5 send = mean(pluck('network-send', L, 0)) lists['network-send'].append(send / 2**20 / (interval or 0.5)) recv = mean(pluck('network-recv', L, 0)) lists['network-recv'].append(recv / 2**20 / (interval or 0.5))
def test_generate_intervals_monthly_keys(self): actual = list( pluck( 'key', generate_intervals('Monthly', '2012-12-12', '2013-03-19') ) ) expected = ['12M12', '13M01', '13M02', '13M03'] self.assertEqual(actual, expected)
def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) result = yield aa.identity() assert not a.active response = yield aa.compute(key='x', function=dumps(add), args=dumps([1, 2]), who_has={}, close=True) assert not a.active assert response['status'] == 'OK' assert a.data['x'] == 3 assert c.who_has['x'] == {a.address} assert isinstance(response['compute_start'], float) assert isinstance(response['compute_stop'], float) assert isinstance(response['thread'], Integral) response = yield bb.compute(key='y', function=dumps(add), args=dumps(['x', 10]), who_has={'x': [a.address]}) assert response['status'] == 'OK' assert b.data['y'] == 13 assert c.who_has['y'] == {b.address} assert response['nbytes'] == sizeof(b.data['y']) assert isinstance(response['transfer_start'], float) assert isinstance(response['transfer_stop'], float) def bad_func(): 1 / 0 response = yield bb.compute(key='z', function=dumps(bad_func), args=dumps(()), close=True) assert not b.active assert response['status'] == 'error' assert isinstance(loads(response['exception']), ZeroDivisionError) if sys.version_info[0] >= 3: assert any('1 / 0' in line for line in pluck( 3, traceback.extract_tb(loads(response['traceback']))) if line) aa.close_streams() yield a._close() assert a.address not in c.ncores and b.address in c.ncores assert list(c.ncores.keys()) == [b.address] assert isinstance(b.address, str) assert b.ip in b.address assert str(b.port) in b.address bb.close_streams() yield b._close()
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """ Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, Integral): axis = validate_axis(axis, x.ndim) axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) for ax in axis: chunks = x.chunks[ax] if len(chunks) > 1 and np.isnan(chunks).any(): raise ValueError( "Arg-reductions do not work with arrays that have " "unknown chunksizes. At some point in your computation " "this array lost chunking information" ) # Map chunk across all blocks name = 'arg-reduce-{0}'.format(tokenize(axis, x, chunk, combine, split_every)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something graph = HighLevelGraph.from_collections(name, dsk, dependencies=[x]) tmp = Array(graph, name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def _metadata(self, query: str) -> Iterator[_ColumnMetadata]: for name, type, null in toolz.pluck( ["column_name", "column_type", "null"], self.con.execute(f"DESCRIBE {query}"), ): yield _ColumnMetadata( name=name, type=parse(type)(nullable=null.lower() == "yes"), )
def test_generate_intervals_weekly_end_dates(self): actual = list( pluck( 'end_date', generate_intervals('Weekly', '2012-08-19', '2012-08-24') ) ) expected = [getdate('2012-08-19'), getdate('2012-08-26')] self.assertEqual(actual, expected)
def test_generate_intervals_yearly_end_dates(self): actual = list( pluck( 'end_date', generate_intervals('Yearly', '2012-12-12', '2013-01-19') ) ) expected = [getdate('2012-12-31'), getdate('2013-12-31')] self.assertEqual(actual, expected)
def test_generate_intervals_weekly_labels(self): actual = list( pluck( 'label', generate_intervals('Weekly', '2012-08-19', '2012-08-24') ) ) expected = ['2012-08-13', '2012-08-20'] self.assertEqual(actual, expected)
def test_generate_intervals_yearly_keys(self): actual = list( pluck( 'key', generate_intervals('Yearly', '2012-12-12', '2013-03-19') ) ) expected = ['12Y', '13Y'] self.assertEqual(actual, expected)
def test_generate_intervals_weekly_keys(self): actual = list( pluck( 'key', generate_intervals('Weekly', '2012-08-19', '2012-09-12') ) ) expected = ['12W33', '12W34', '12W35', '12W36', '12W37'] self.assertEqual(actual, expected)
def test_generate_intervals_yearly_labels(self): actual = list( pluck( 'label', generate_intervals('Yearly', '2012-12-12', '2013-04-19') ) ) expected = ['2012', '2013'] self.assertEqual(actual, expected)
def post_compute(e, q, d): """ Execute a query using MongoDB's aggregation pipeline The compute_one functions operate on Mongo Collection / list-of-dict queries. Once they're done we need to actually execute the query on MongoDB. We do this using the aggregation pipeline framework. http://docs.mongodb.org/manual/core/aggregation-pipeline/ """ q = q.append({'$project': toolz.merge({'_id': 0}, # remove mongo identifier dict((col, 1) for col in e.columns))}) dicts = q.coll.aggregate(list(q.query))['result'] if e.iscolumn: return list(pluck(e.columns[0], dicts)) # dicts -> values else: return list(pluck(e.columns, dicts)) # dicts -> tuples
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """ Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, Integral): axis = validate_axis(axis, x.ndim) axis = (axis, ) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) # Map chunk across all blocks name = 'arg-reduce-{0}'.format( tokenize(axis, x, chunk, combine, split_every)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list( product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple( (1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name, ) + k, (chunk, (old, ) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something tmp = Array(sharedict.merge(x.dask, (name, dsk), dependencies={name: {x.name}}), name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def combine_with(cls, workers, images, name=None, **kwargs): """Instantiates builders based on the available workers The workers and images are matched based on their architecture. Parameters ---------- workers : List[DockerLatentWorker] Worker instances the builders may run on. images : List[DockerImage], default [] Docker images the builder's steps may run in. Pass None to use class' images property. Returns ------- docker_builder : List[DockerBuilder] Builder instances. """ suitable_images = filter(InstanceOf(DockerImage), images) suitable_images = filter(cls.image_filter, suitable_images) suitable_workers = filter(InstanceOf(DockerLatentWorker), workers) suitable_workers = filter(cls.worker_filter, suitable_workers) suitable_workers = list(suitable_workers) # join the images with the suitable workers image_worker_pairs = [(image, worker) for image in suitable_images for worker in suitable_workers if worker.supports(image.platform)] # group the suitable workers for each image pairs_by_image = toolz.groupby(0, image_worker_pairs).items() workers_by_image = { image: list(toolz.pluck(1, pairs)) for image, pairs in pairs_by_image } builders = [] for image, workers in workers_by_image.items(): if workers: builder_name = image.title or image.name.title() if name: builder_name += f' {name}' builder = cls(name=builder_name, image=image, workers=workers, **kwargs) builders.append(builder) else: warnings.warn( f'{cls.__name__}: there are no docker workers available ' f'for platform `{image.platform}`, omitting image ' f'`{image}`') return builders
def post_compute(e, q, scope=None): """Compute the result of a Broadcast expression. """ columns = dict((col, 1) for qry in q.query for col in qry.get("$project", [])) scope = {"$project": toolz.merge({"_id": 0}, dict((col, 1) for col in columns))} # remove mongo identifier q = q.append(scope) dicts = get_result(q.coll.aggregate(list(q.query))) assert len(columns) == 1 return list(pluck(first(columns.keys()), dicts))
def test_min_max(): loop = IOLoop.current() cluster = yield LocalCluster(0, scheduler_port=0, silence_logs=False, processes=False, diagnostics_port=None, loop=loop, asynchronous=True) yield cluster._start() try: adapt = Adaptive(cluster.scheduler, cluster, minimum=1, maximum=2, interval='20 ms', wait_count=10) c = yield Client(cluster, asynchronous=True, loop=loop) start = time() while not cluster.scheduler.workers: yield gen.sleep(0.01) assert time() < start + 1 yield gen.sleep(0.2) assert len(cluster.scheduler.workers) == 1 assert frequencies(pluck(1, adapt.log)) == {'up': 1} futures = c.map(slowinc, range(100), delay=0.1) start = time() while len(cluster.scheduler.workers) < 2: yield gen.sleep(0.01) assert time() < start + 1 assert len(cluster.scheduler.workers) == 2 yield gen.sleep(0.5) assert len(cluster.scheduler.workers) == 2 assert len(cluster.workers) == 2 assert frequencies(pluck(1, adapt.log)) == {'up': 2} del futures start = time() while len(cluster.scheduler.workers) != 1: yield gen.sleep(0.01) assert time() < start + 2 assert frequencies(pluck(1, adapt.log)) == {'up': 2, 'down': 1} finally: yield c._close() yield cluster._close()
def test_min_max(): loop = IOLoop.current() cluster = yield LocalCluster(0, scheduler_port=0, silence_logs=False, processes=False, diagnostics_port=None, loop=loop, asynchronous=True) yield cluster._start() try: adapt = Adaptive(cluster.scheduler, cluster, minimum=1, maximum=2, interval='20 ms', wait_count=10) c = yield Client(cluster, asynchronous=True, loop=loop) start = time() while not cluster.scheduler.workers: yield gen.sleep(0.01) assert time() < start + 1 yield gen.sleep(0.2) assert len(cluster.scheduler.workers) == 1 assert frequencies(pluck(1, adapt.log)) == {'up': 1} futures = c.map(slowinc, range(100), delay=0.1) start = time() while len(cluster.scheduler.workers) < 2: yield gen.sleep(0.01) assert time() < start + 1 assert len(cluster.scheduler.workers) == 2 yield gen.sleep(0.5) assert len(cluster.scheduler.workers) == 2 assert len(cluster.workers) == 2 assert frequencies(pluck(1, adapt.log)) == {'up': 2} del futures start = time() while len(cluster.scheduler.workers) != 1: yield gen.sleep(0.01) assert time() < start + 2 assert frequencies(pluck(1, adapt.log)) == {'up': 2, 'down': 1} finally: yield c.close() yield cluster.close()
def execute(): subscriptions = frappe.get_all("Gym Subscription", {"is_training": 1}) for name in pluck("name", subscriptions): from_date, to_date = frappe.db.get_value("Gym Subscription", name, ["from_date", "to_date"]) months = month_diff(from_date, to_date, as_dec=1) days = date_diff(add_days(to_date, 1), from_date) day_fraction = months / flt(days) frappe.db.set_value("Gym Subscription", name, "day_fraction", day_fraction)
def _get_user_companies(user): result = frappe.db.sql( """ SELECT for_value FROM `tabUser Permission` WHERE allow='Company' AND user=%(user)s """, values={'user': user}, as_dict=1, ) return list(pluck('for_value', result))
def dfs_decomposition_depth_tuple(RN, path_func, source_nodes=None): """ Decompose network into lists of simply connected nodes For the routing problem, these sets of nodes are segments in a reach terminated by a junction, headwater, or tailwater. The function also identfies the network depth, by reach, of each reach and the output of the function is a list of tuples in the form: (network depth, [reach list]). The order of these reaches are suitable to be parallelized as we guarantee that 1) for any segment withn a reach, the predecessor segments appear before it in the reach; and 2) for any reach, the predecessor reaches appear before it in the main list. This is accomplished by a depth first search on the reversed graph. The depth first search function logs the path from each node to any network break defined by the `path_func`. The network depth is counted as the number of successive breaks. Arguments: N (Dict[obj: List[obj]]): The graph path_func: partial function defining the Network breaking function source_nodes: starting points (default use the top of the network, which, for the reversed network passed to this function, is the set of tailwaters...) Method: call dfs_decomposition call coalesce reaches call count order -- currently done with another dfs, but could be level order (i.e., bfs) zip results together and return order/reach tuples as before. Returns: [List(tuple)]: List of tuples of (depth, path) to be processed in order. """ reach_list = dfs_decomposition(RN, path_func, source_nodes) # Label coalesced reaches with the hydrologcially downstream-most segment tag_idx = -1 RN_coalesced = coalesce_reaches(RN, reach_list, tag_idx) # Make sure that if source_nodes is not empty, # this doesn't create some kind of nasty collision. # TODO: There might be a way to more gracefully handle this... if source_nodes is None: source_nodes = headwaters(RN_coalesced) else: if source_nodes not in RN_coalesced: raise AssertionError( "the source nodes *must* be members of the coalesced set...") depth_tuples = dfs_count_depth(RN_coalesced, source_nodes) return zip(pluck(0, depth_tuples), reach_list)
def slice_slices_and_integers(out_name, in_name, blockdims, index): """ Dask array indexing with slices and integers See Also -------- _slice_1d """ shape = tuple(cached_cumsum(dim, initial_zero=True)[-1] for dim in blockdims) for dim, ind in zip(shape, index): if np.isnan(dim) and ind != slice(None, None, None): raise ValueError("Arrays chunk sizes are unknown: %s", shape) assert all(isinstance(ind, (slice, Integral)) for ind in index) assert len(index) == len(blockdims) # Get a list (for each dimension) of dicts{blocknum: slice()} block_slices = list(map(_slice_1d, shape, blockdims, index)) sorted_block_slices = [sorted(i.items()) for i in block_slices] # (in_name, 1, 1, 2), (in_name, 1, 1, 4), (in_name, 2, 1, 2), ... in_names = list(product([in_name], *[pluck(0, s) for s in sorted_block_slices])) # (out_name, 0, 0, 0), (out_name, 0, 0, 1), (out_name, 0, 1, 0), ... out_names = list(product([out_name], *[range(len(d))[::-1] if i.step and i.step < 0 else range(len(d)) for d, i in zip(block_slices, index) if not isinstance(i, Integral)])) all_slices = list(product(*[pluck(1, s) for s in sorted_block_slices])) dsk_out = {out_name: (getitem, in_name, slices) for out_name, in_name, slices in zip(out_names, in_names, all_slices)} new_blockdims = [new_blockdim(d, db, i) for d, i, db in zip(shape, index, blockdims) if not isinstance(i, Integral)] return dsk_out, new_blockdims
def slice_slices_and_integers(out_name, in_name, blockdims, index): """ Dask array indexing with slices and integers See Also -------- _slice_1d """ shape = tuple(map(sum, blockdims)) assert all(isinstance(ind, (slice, int, long)) for ind in index) assert len(index) == len(blockdims) # Get a list (for each dimension) of dicts{blocknum: slice()} block_slices = list(map(_slice_1d, shape, blockdims, index)) sorted_block_slices = [sorted(i.items()) for i in block_slices] # (in_name, 1, 1, 2), (in_name, 1, 1, 4), (in_name, 2, 1, 2), ... in_names = list( product([in_name], *[pluck(0, s) for s in sorted_block_slices])) # (out_name, 0, 0, 0), (out_name, 0, 0, 1), (out_name, 0, 1, 0), ... out_names = list( product([out_name], *[ range(len(d))[::-1] if i.step and i.step < 0 else range(len(d)) for d, i in zip(block_slices, index) if not isinstance(i, (int, long)) ])) all_slices = list(product(*[pluck(1, s) for s in sorted_block_slices])) dsk_out = dict( (out_name, (getitem, in_name, slices)) for out_name, in_name, slices in zip(out_names, in_names, all_slices)) new_blockdims = [ new_blockdim(d, db, i) for d, i, db in zip(shape, index, blockdims) if not isinstance(i, (int, long)) ] return dsk_out, new_blockdims
def slice_slices_and_integers(out_name, in_name, blockdims, index): """ Dask array indexing with slices and integers See Also -------- _slice_1d """ shape = tuple(map(sum, blockdims)) for dim, ind in zip(shape, index): if np.isnan(dim) and ind != slice(None, None, None): raise ValueError("Arrays chunk sizes are unknown: %s", shape) assert all(isinstance(ind, (slice, Integral)) for ind in index) assert len(index) == len(blockdims) # Get a list (for each dimension) of dicts{blocknum: slice()} block_slices = list(map(_slice_1d, shape, blockdims, index)) sorted_block_slices = [sorted(i.items()) for i in block_slices] # (in_name, 1, 1, 2), (in_name, 1, 1, 4), (in_name, 2, 1, 2), ... in_names = list(product([in_name], *[pluck(0, s) for s in sorted_block_slices])) # (out_name, 0, 0, 0), (out_name, 0, 0, 1), (out_name, 0, 1, 0), ... out_names = list(product([out_name], *[range(len(d))[::-1] if i.step and i.step < 0 else range(len(d)) for d, i in zip(block_slices, index) if not isinstance(i, Integral)])) all_slices = list(product(*[pluck(1, s) for s in sorted_block_slices])) dsk_out = {out_name: (getitem, in_name, slices) for out_name, in_name, slices in zip(out_names, in_names, all_slices)} new_blockdims = [new_blockdim(d, db, i) for d, i, db in zip(shape, index, blockdims) if not isinstance(i, Integral)] return dsk_out, new_blockdims
def resource_append(lists, msg): L = list(msg.values()) if not L: return try: for k in ['cpu', 'memory-percent']: lists[k].append(mean(pluck(k, L)) / 100) except KeyError: # initial messages sometimes lack resource data return # this is safe to skip lists['time'].append(mean(pluck('time', L)) * 1000) if len(lists['time']) >= 2: t1, t2 = lists['time'][-2], lists['time'][-1] interval = (t2 - t1) / 1000 else: interval = 0.5 send = mean(pluck('network-send', L, 0)) lists['network-send'].append(send / 2**20 / (interval or 0.5)) recv = mean(pluck('network-recv', L, 0)) lists['network-recv'].append(recv / 2**20 / (interval or 0.5))
def _get_property_rent(property_group): sales_invoices = frappe.db.sql( """ SELECT grand_total, outstanding_amount FROM `tabSales Invoice` WHERE pm_property_group = %s AND docstatus = 1 """, property_group, as_dict=1, ) grand_totals = sum(pluck("grand_total", sales_invoices)) outstanding_amounts = sum(pluck("outstanding_amount", sales_invoices)) return { "total_paid": grand_totals - outstanding_amounts, "total_unpaid": outstanding_amounts, "total_rent": grand_totals, }
def get_static(self, field): try: static_fields = pluck("field", self.statics) index = list(static_fields).index(field) return merge( super(Service, self).to_dict(include=["name"]), {"id": self.key.urlsafe()}, self.statics[index], ) except ValueError: return None
def get_output(fields: List[str], response: Any) -> List[Tuple[str, ...]]: ''' Extract raw output from returned query dictionary Parameters fields: list of output fields response: dict from query response Returns list of tuples for output table ''' patents = response['patents'] return list(pluck(fields, patents))
def _get_columns(): columns = [ make_column("posting_date", "Date", type="Date", width=90), make_column("net_total", type="Currency"), make_column("tax_total", type="Currency"), make_column("grand_total", type="Currency"), make_column("returns grand_total", "Returns Total", type="Currency"), ] mops = pluck("name", frappe.get_all("Mode of Payment")) return (columns + [make_column(x, type="Currency") for x in mops] + [make_column("total_collected", type="Currency")])
def _submit_draft_interests(posting_date): interests = frappe.get_all( "Microfinance Loan Interest", filters={ "posting_date": posting_date, "dcostatus": 0 }, ) for name in pluck("name", interests): doc = frappe.get_doc("Microfinance Loan Interest", name) doc.submit()
def _get_membership_items(): default_item_group = frappe.db.get_value('Gym Settings', None, 'default_item_group') return pluck( 'name', frappe.get_all('Item', filters={ 'item_group': default_item_group, 'disabled': 0, 'is_gym_membership_item': 1, }), ) if default_item_group else []
def select_to_iterator(sel, dshape=None, **kwargs): engine = sel.bind # TODO: get engine from select with engine.connect() as conn: result = conn.execute(sel) if dshape and isscalar(dshape.measure): result = pluck(0, result) else: result = map(tuple, result) # Turn RowProxy into tuple for item in result: yield item
def post_compute(e, q, scope=None): """Compute the result of a Broadcast expression. """ columns = dict((col, 1) for qry in q.query for col in qry.get('$project', [])) scope = {'$project': toolz.merge({'_id': 0}, # remove mongo identifier dict((col, 1) for col in columns))} q = q.append(scope) dicts = get_result(q.coll.aggregate(list(q.query))) assert len(columns) == 1 return list(pluck(first(columns.keys()), dicts))
def post_compute(e, q, d): """Compute the result of a Broadcast expression. """ columns = dict((col, 1) for qry in q.query for col in qry.get('$project', [])) d = {'$project': toolz.merge({'_id': 0}, # remove mongo identifier dict((col, 1) for col in columns))} q = q.append(d) dicts = q.coll.aggregate(list(q.query))['result'] assert len(columns) == 1 return list(pluck(first(columns.keys()), dicts))
def _get_rent_actual(property_group): data = frappe.db.sql( """ SELECT rental_rate FROM `tabProperty` WHERE property_group = %s """, property_group, as_dict=1, ) return {"total_rent_actual": sum(pluck("rental_rate", data))}
def get_leave_balance(employee, date): from erpnext.hr.doctype.leave_application.leave_application import ( get_leave_balance_on, ) carryable_leaves = frappe.get_all("Leave Type", {"is_carry_forward": 1}) return sum( [ get_leave_balance_on(employee, leave_type, date) for leave_type in pluck("name", carryable_leaves) ] )
def test_groupby_tasks(): b = db.from_sequence(range(160), npartitions=4) out = b.groupby(lambda x: x % 10, max_branch=4, method='tasks') partitions = dask.get(out.dask, out._keys()) for a in partitions: for b in partitions: if a is not b: assert not set(pluck(0, a)) & set(pluck(0, b)) b = db.from_sequence(range(1000), npartitions=100) out = b.groupby(lambda x: x % 123, method='tasks') assert len(out.dask) < 100**2 partitions = dask.get(out.dask, out._keys()) for a in partitions: for b in partitions: if a is not b: assert not set(pluck(0, a)) & set(pluck(0, b)) b = db.from_sequence(range(10000), npartitions=345) out = b.groupby(lambda x: x % 2834, max_branch=24, method='tasks') partitions = dask.get(out.dask, out._keys()) for a in partitions: for b in partitions: if a is not b: assert not set(pluck(0, a)) & set(pluck(0, b))
def slice_slices_and_integers(out_name, in_name, blockdims, index): """ Dask array indexing with slices and integers See Also -------- _slice_1d """ shape = tuple(map(sum, blockdims)) assert all(isinstance(ind, (slice, int, long)) for ind in index) assert len(index) == len(blockdims) # Get a list (for each dimension) of dicts{blocknum: slice()} block_slices = list(map(_slice_1d, shape, blockdims, index)) sorted_block_slices = [sorted(i.items()) for i in block_slices] # (in_name, 1, 1, 2), (in_name, 1, 1, 4), (in_name, 2, 1, 2), ... in_names = list(product([in_name], *[pluck(0, s) for s in sorted_block_slices])) # (out_name, 0, 0, 0), (out_name, 0, 0, 1), (out_name, 0, 1, 0), ... out_names = list(product([out_name], *[range(len(d))[::-1] if i.step and i.step < 0 else range(len(d)) for d, i in zip(block_slices, index) if not isinstance(i, (int, long))])) all_slices = list(product(*[pluck(1, s) for s in sorted_block_slices])) dsk_out = dict((out_name, (getitem, in_name, slices)) for out_name, in_name, slices in zip(out_names, in_names, all_slices)) new_blockdims = [new_blockdim(d, db, i) for d, i, db in zip(shape, index, blockdims) if not isinstance(i, (int, long))] return dsk_out, new_blockdims
def iter_enumerations(): integers_or_symbols = concatv( find(children, type='integer'), find(children, type='symbol'), ) values = list(pluck('value', integers_or_symbols)) if values: yield make_json_ast_node( type='enumeration_values', values=values, ) intervals = find_many_or_none(children, type='interval') if intervals is not None: yield from intervals
def _into_iter_mongodb(coll, columns=None, dshape=None): """ Into helper function Return both a lazy sequence of tuples and a list of column names """ seq = coll.find() if not columns and dshape: columns = dshape.measure.names elif not columns: item = next(seq) seq = concat([[item], seq]) columns = sorted(item.keys()) columns.remove('_id') return columns, pluck(columns, seq)
def arg_reduction(x, chunk, combine, agg, axis=None, split_every=None, out=None): """ Generic function for argreduction. Parameters ---------- x : Array chunk : callable Partialed ``arg_chunk``. combine : callable Partialed ``arg_combine``. agg : callable Partialed ``arg_agg``. axis : int, optional split_every : int or dict, optional """ if axis is None: axis = tuple(range(x.ndim)) ravel = True elif isinstance(axis, int): if axis < 0: axis += x.ndim if axis < 0 or axis >= x.ndim: raise ValueError("axis entry is out of bounds") axis = (axis,) ravel = x.ndim == 1 else: raise TypeError("axis must be either `None` or int, " "got '{0}'".format(axis)) # Map chunk across all blocks name = 'arg-reduce-chunk-{0}'.format(tokenize(chunk, axis)) old = x.name keys = list(product(*map(range, x.numblocks))) offsets = list(product(*(accumulate(operator.add, bd[:-1], 0) for bd in x.chunks))) if ravel: offset_info = zip(offsets, repeat(x.shape)) else: offset_info = pluck(axis[0], offsets) chunks = tuple((1, ) * len(c) if i in axis else c for (i, c) in enumerate(x.chunks)) dsk = dict(((name,) + k, (chunk, (old,) + k, axis, off)) for (k, off) in zip(keys, offset_info)) # The dtype of `tmp` doesn't actually matter, just need to provide something tmp = Array(sharedict.merge(x.dask, (name, dsk)), name, chunks, dtype=x.dtype) dtype = np.argmin([1]).dtype result = _tree_reduce(tmp, agg, axis, False, dtype, split_every, combine) return handle_out(out, result)
def visit_variable_calculee(self, node, children): description = find_one(children, type='string')['value'] subtypes = find_many_or_none(children, type='variable_calculee_subtype') or [] subtypes = sorted(pluck('value', subtypes)) value_type = find_one_or_none(children, type='value_type') tableau = find_one_or_none(children, type='variable_calculee_tableau') return make_json_ast_node( base=('base' in subtypes) or None, description=description, linecol=True, name=children[0]['value'], node=node, restituee=('restituee' in subtypes) or None, tableau=None if tableau is None else tableau['dimension'], value_type=None if value_type is None else value_type['value'], )
def compress(sexp): if sexp is None: return None if not isinstance(sexp, list): return sexp if all(not isinstance(x, list) for x in sexp): return sexp # if both heads are the same then gut the children. children = list(map(compress, sexp[1:])) heads = set(extract_op(x) for x in t.pluck(0, filter(lambda x: isinstance(x,list),children))) heads.add(extract_op(sexp[0])) # number heads are fine, we just want to make sure that the # string type heads are all the same if len(heads) <= 1: return list(t.cons(sexp[0], t.concat(x[1:] if isinstance(x,list) else [x] for x in children))) return [sexp[0]]+children
def f(c, a, b): aa = rpc(ip=a.ip, port=a.port) bb = rpc(ip=b.ip, port=b.port) assert not a.active response, _ = yield aa.compute(key='x', function=add, args=[1, 2], who_has={}, close=True) assert not a.active assert response == b'OK' assert a.data['x'] == 3 assert c.who_has['x'] == set([(a.ip, a.port)]) response, info = yield bb.compute(key='y', function=add, args=['x', 10], who_has={'x': {a.address}}) assert response == b'OK' assert b.data['y'] == 13 assert c.who_has['y'] == set([(b.ip, b.port)]) assert info['nbytes'] == sizeof(b.data['y']) def bad_func(): 1 / 0 response, content = yield bb.compute(key='z', function=bad_func, args=(), close=True) assert not b.active assert response == b'error' assert isinstance(content['exception'], ZeroDivisionError) if sys.version_info[0] >= 3: assert any('1 / 0' in line for line in pluck(3, traceback.extract_tb(content['traceback'])) if line) aa.close_streams() yield a._close() assert a.address not in c.ncores and b.address in c.ncores assert list(c.ncores.keys()) == [(b.ip, b.port)] assert isinstance(b.address_string, str) assert b.ip in b.address_string assert str(b.port) in b.address_string bb.close_streams() yield b._close()
def test_gather_many_small(c, s, a, *workers): a.total_out_connections = 2 futures = yield c._scatter(list(range(100))) assert all(w.data for w in workers) def f(*args): return 10 future = c.submit(f, *futures, workers=a.address) yield wait(future) types = list(pluck(0, a.log)) req = [i for i, t in enumerate(types) if t == 'request-dep'] recv = [i for i, t in enumerate(types) if t == 'receive-dep'] assert min(recv) > max(req) assert a.comm_nbytes == 0
def keys_to_flush(lengths, fraction=0.1, maxcount=100000): """ Which keys to remove >>> lengths = {'a': 20, 'b': 10, 'c': 15, 'd': 15, ... 'e': 10, 'f': 25, 'g': 5} >>> keys_to_flush(lengths, 0.5) ['f', 'a'] """ top = topk(max(len(lengths) // 2, 1), lengths.items(), key=1) total = sum(lengths.values()) cutoff = min(maxcount, max(1, bisect(list(accumulate(add, pluck(1, top))), total * fraction))) result = [k for k, v in top[:cutoff]] assert result return result
def test_adapt_quickly(): """ We want to avoid creating and deleting workers frequently Instead we want to wait a few beats before removing a worker in case the user is taking a brief pause between work """ cluster = yield LocalCluster(0, asynchronous=True, processes=False, scheduler_port=0, silence_logs=False, diagnostics_port=None) client = yield Client(cluster, asynchronous=True) adapt = Adaptive(cluster.scheduler, cluster, interval=20, wait_count=5, maximum=10) try: future = client.submit(slowinc, 1, delay=0.100) yield wait(future) assert len(adapt.log) == 1 # Scale up when there is plenty of available work futures = client.map(slowinc, range(1000), delay=0.100) while frequencies(pluck(1, adapt.log)) == {'up': 1}: yield gen.sleep(0.01) assert len(adapt.log) == 2 assert 'up' in adapt.log[-1] d = [x for x in adapt.log[-1] if isinstance(x, dict)][0] assert 2 < d['n'] <= adapt.maximum while len(cluster.scheduler.workers) < adapt.maximum: yield gen.sleep(0.01) del futures while len(cluster.scheduler.workers) > 1: yield gen.sleep(0.01) # Don't scale up for large sequential computations x = yield client.scatter(1) for i in range(100): x = client.submit(slowinc, x) yield gen.sleep(0.1) assert len(cluster.scheduler.workers) == 1 finally: yield client.close() yield cluster.close()