def gender_from_bam(bam_path, prefix=''): """Predict the gender from a BAM alignment file. Args: bam_path (path): path to a BAM alignment file prefix (str, optional): string to prefix to 'X', 'Y' Returns: Gender: tuple of X coverage, Y coverage, and sex prediction Examples: >>> gender_from_bam('alignment.bam', prefix='chr') Gender(x_coverage=123.31, y_coverage=0.13, sex='female') """ # setup: connect to a BAM file bam = BamFile(bam_path) # step 0: fake some BED interval rows (already 1,1-based!) fake_bed_rows = [("%sX" % prefix, 1, 59373566), ("%sY" % prefix, 69362, 11375310)] # step 1: run the pipeline sequence = pipe( fake_bed_rows, map(lambda interval: bam(*interval)), map(average) ) # step: make the prediction x_coverage, y_coverage = list(sequence) sex = predict_gender(x_coverage, y_coverage) return Gender(x_coverage, y_coverage, sex)
def parser(filename, *args, **kwargs): g = nx.DiGraph() tz.pipe(filename, c_open(mode='r'), c.map(str.strip), c.map(c_split(sep=',')), g.add_edges_from) return g
def streaming_pca(samples, n_components=2, batch_size=50): ipca = decomposition.IncrementalPCA(n_components=n_components, batch_size=batch_size) _ = list(tz.pipe(samples, curried.partition(batch_size), curried.map(np.array), curried.map(ipca.partial_fit))) return ipca
def map(self, func, data): # pylint: disable=no-self-use return pipe( data, map(func), map(DummyResult), list )
def ghost_internal(x, axes): """ Share boundaries between neighboring blocks Parameters ---------- x: da.Array A dask array axes: dict The size of the shared boundary per axis The axes dict informs how many cells to overlap between neighboring blocks {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis """ dims = list(map(len, x.blockdims)) expand_key2 = partial(expand_key, dims=dims) interior_keys = pipe(x._keys(), flatten, map(expand_key2), map(flatten), concat, list) interior_slices = dict((k, fractional_slice(k, axes)) for k in interior_keys) shape = (3,) * x.ndim name = next(ghost_names) ghost_blocks = dict(((name,) + k[1:], (rec_concatenate, (concrete, expand_key2(k)))) for k in interior_keys) blockdims = [ [bds[0] + axes.get(i, 0)] + [bd + axes.get(i, 0) * 2 for bd in bds[1:-1]] + [bds[-1] + axes.get(i, 0)] for i, bds in enumerate(x.blockdims)] return Array(merge(interior_slices, ghost_blocks, x.dask), name, blockdims=blockdims)
def __str__(self): labels = self.labels if all(map(isvalid_identifier, map(first, labels))): rest = ', '.join('%s=%r' % l for l in labels) else: rest = '{%s}' % ', '.join('%r: %r' % l for l in labels) return '%s.relabel(%s)' % (self._child, rest)
def outer_dict(dict_in): """Outer product of dictionary values Args: dict_in: a dictionary with iterable values Returns: a list of dictionaries >>> assert pipe( ... dict(a=[1], b=[2, 3]), ... curry(outer_dict), ... lambda x: x == [dict(a=1, b=2), dict(a=1, b=3)] ... ) """ return pipe( dict_in.items(), lambda x: zip(*x), list, lambda x: (x[0], product(*x[1])), tlam(lambda x, y: zip(repeat(x), y)), map(lambda x: zip(*x)), map(dict), list )
def parse_people(do_request): logger.info('Parsing people') def parse_representative(doc): doc = doc('div.wpsPortletBody') raw_birth_date = doc('fieldset table').eq(0).find('td').eq(1).text().replace(' ', '') return { 'name': doc.find('h3').eq(0).text(), 'birthDate': arrow.get(raw_birth_date, 'D.M.YYYY') if raw_birth_date else None, 'image': DZ_RS_URL + doc.find('img').eq(0).attr('src'), 'group': doc('.panelBox100 a').attr('href'), 'location': doc(u'*:contains("Volilno okro")').parent().text().split(':')[1].strip(), 'gender': "F" if 'Poslanka' in str(doc) else "M", } # get all people return toolz.compose( # get back metadata curried.map(parse_representative), # visit person's link curried.map(do_request), # get a link for each person lambda doc: doc("p.podnaslovOsebaLI a").map(lambda i, r: pq(r).attr('href')), # get page with a list of people do_request, )(DZ_RS_PEOPLE_URL)
def __str__(self): labels = self.labels if all(map(isvalid_identifier, map(first, labels))): rest = ", ".join("%s=%r" % l for l in labels) else: rest = "{%s}" % ", ".join("%r: %r" % l for l in labels) return "%s.relabel(%s)" % (self._child, rest)
def get_service_step(service_recipe): """ Get step timedelta: The smaller duration of service_recipe's periods. """ def diff(start, end): return end - start res_delta_diffs = compose(map(lambda p: diff(*p)), get('delta_periods')) return compose(min, map(min), map(res_delta_diffs))(service_recipe)
def functional(): return count_by(itemgetter('hour'), map(json.loads, filter(None, mapcat(lambda output: output.strip().split('\n'), map(lambda date: logs[date.strftime('%Y/%m/%d')], map(lambda days_ago: today - timedelta(days=days_ago), range(1, days_of_logs + 1)))))))
def piped(): return (_| range(1, days_of_logs + 1) | map(lambda days_ago: today - timedelta(days=days_ago)) | map(lambda date: logs[date.strftime('%Y/%m/%d')]) | mapcat(lambda output: output.strip().split('\n')) | filter(None) | map(json.loads) | count_by(itemgetter('hour')) |_)
def _iter(self, usecols=None): from blaze.api.into import into dfs = self.pandas_read_csv(usecols=usecols, chunksize=self.chunksize, dtype='O', parse_dates=[]) return pipe(dfs, map(partial(pd.DataFrame.fillna, value='')), map(partial(into, list)), concat)
def opt_weight_ir_grid(df, alphas, look_ahead_pers, long_only=True, tilt_weights=None): """exhaustive grid search over alphas, look_ahead_per, norm_types returning dataframe of cumulative returns for each optimal portfolio construction""" norm_types = [2,] end_date = df.index[-(look_ahead_pers[-1] + 1)] p = pipe(product(alphas, norm_types, look_ahead_pers), map(lambda x: list(x) + [calc_opt_weight_portfolio_ir(df, x[0], x[1], x[2], long_only, tilt_weights)]), map(lambda x: dict(zip(['alpha', 'norm_type', 'look_ahead_per', 'ir'], x)))) return pd.DataFrame(list(p))
def __calculate_max_column_length(column_key): max_value_length = pipe( data, iterkeys, map(lambda key: data[key][column_key]), pvector, map(str), map(len), max ) return max(max_value_length, len(str(column_key)))
def discover_sqlcontext(ctx): try: table_names = list(map(str, ctx.tableNames())) except AttributeError: java_names = ctx._ssql_ctx.catalog().tables().keySet() table_names = list(scala_set_to_set(ctx, java_names)) table_names.sort() dshapes = zip(table_names, map(discover, map(ctx.table, table_names))) return datashape.DataShape(datashape.Record(dshapes))
def destruct(x): """ Deconstructs a data structure into a 1-D np.ndarray (via multiple dispatch) Converts a list of numpy arrays to a single array """ # make sure the values are all numpy arrays list(map(enforce(np.ndarray), x)) # unravel each array, c return pipe(x, map(np.ravel), concat, list, np.array)
def compute_up(expr, args, **kwargs): from_objs = list(unique(concat(map(get_all_froms, args)))) if len(from_objs) > 1: # TODO: how do you do this in sql? please send help raise ValueError('only columns from the same table can be merged') cols = list(unique(concat(map(get_unsafe_inner_columns, args, expr.args)))) sel = sa.select(cols, from_obj=from_objs[0]) where = unify_wheres(args) if where is not None: sel = sel.where(where) return sel
def export_intervals(chanjo_db, include_header=True, bed_score=0): r"""Return BED-formatted interval lines from existing ``chanjo_db``. BED lines are ready to be printed or written to a file. Args: chanjo_db (session): ``sqlalchemy.orm.session`` object with a ``.query``-method include_header (bool, optional): whether to include BED header bed_score (int, optional): dummy score (0-1000) to insert at field 5 to complete the BED format Yields: str: stringified and tab-delimited interval Examples: >>> from chanjo import export_intervals, Store ... # instantiate a new connection to a Chanjo database >>> db = Store('./coverage.sqlite') >>> with open('intervals.sorted.bed', 'w') as stream: ... # write intervals in BED-format with appropriate headers ... for bed_line in export_intervals(db): ... stream.write(bed_line + '\n') """ if include_header: yield '#chrom\tchromStart\tchromEnd\tname\tscore\tstrand' # setup up which columns to fetch to make BED file # column 5 is just a silly default for the "score" field in BED i = Interval # alias columns = (i.contig, i.start - 1, i.end, i.id, i.strand) # BED files are tab-delimited delimiter = '\t' # 1. fetch interval tuples from the database (producer) # 2. stringify each item in each subsequence (interval tuple) # 3. join lines on tab-character # 4. prepend the header bed_lines = pipe( fetch_records(chanjo_db, columns), map(map(str)), # convert fields to strings map(juxt(compose(list, take(4)), # keep first 4 fields lambda _: [str(bed_score)], # insert BED score compose(list, last))), # keep last field map(concat), # flatten each item map(delimiter.join) # join on \t ) for bed_line in bed_lines: yield bed_line
def __init__(self, bamfile, outdir): self.bamfile = bamfile stat = self.indexbamfile() self.outdir = outdir assert self.bamfile and self.outdir and stat, "Input error" self._bam = pysam.Samfile(bamfile) self._prealloc_func = partial(np.zeros, dtype=np.int) self.fake_bed_rows = [("chrX", 1, 59373566), ("chrY", 69362, 11375310)] self.sequence = pipe(self.fake_bed_rows, map(lambda interval: self.depthreader(*interval)), map(average) ) self.x_coverage, self.y_coverage = list(self.sequence) self.sex = self.predict_gender()
def ipython_display(specs): """Run publish_display_data for the JS and HTML Args: specs: a list of Vega specs """ pipe( specs, map(lambda x: (uuid.uuid4(), vega.Vega(x))), list, do(html_publish_map), map(tlam(js_publish)), list )
def overlap_internal(x, axes): """ Share boundaries between neighboring blocks Parameters ---------- x: da.Array A dask array axes: dict The size of the shared boundary per axis The axes input informs how many cells to overlap between neighboring blocks {0: 2, 2: 5} means share two cells in 0 axis, 5 cells in 2 axis """ dims = list(map(len, x.chunks)) expand_key2 = partial(expand_key, dims=dims, axes=axes) # Make keys for each of the surrounding sub-arrays interior_keys = pipe(x.__dask_keys__(), flatten, map(expand_key2), map(flatten), concat, list) name = 'overlap-' + tokenize(x, axes) getitem_name = 'getitem-' + tokenize(x, axes) interior_slices = {} overlap_blocks = {} for k in interior_keys: frac_slice = fractional_slice((x.name,) + k, axes) if (x.name,) + k != frac_slice: interior_slices[(getitem_name,) + k] = frac_slice else: interior_slices[(getitem_name,) + k] = (x.name,) + k overlap_blocks[(name,) + k] = (concatenate3, (concrete, expand_key2((None,) + k, name=getitem_name))) chunks = [] for i, bds in enumerate(x.chunks): if len(bds) == 1: chunks.append(bds) else: left = [bds[0] + axes.get(i, 0)] right = [bds[-1] + axes.get(i, 0)] mid = [] for bd in bds[1:-1]: mid.append(bd + axes.get(i, 0) * 2) chunks.append(left + mid + right) dsk = merge(interior_slices, overlap_blocks) dsk = sharedict.merge(x.dask, (name, dsk)) return Array(dsk, name, chunks, dtype=x.dtype)
def ccds_to_bed(ccds_stream): """Convert CCDS dump to Chanjo-style BED stream. Main entry point for default Chanjo converter (ccds). It converts a sorted (start, chrom) CCDS database to the Chanjo BED-format. Args: ccds_stream (file): file handle to read CCDS lines from Yields: Interval: interval with merged block and superblock ids """ return pipe( ccds_stream, filter(grep('Public')), # filter out Public tx map(text_type.rstrip), # strip \n and spaces map(split(sep='\t')), # split into list map(extract_intervals), # convert to Interval concat, # flatten map(rename_sex_interval), # rename sex contigs partial(lazy_groupby, key=attrgetter('contig')), # group by contig pluck(1), # extract second item map(groupby(attrgetter('name'))), # non-lazy group by id map(valmap(merge_related_elements)), # group intervals map(itervalues), # extract values map(partial(sorted, key=attrgetter('start'))), # sort by start pos concat # flatten )
def save_reviews(product_id, tag, reviews): f = open("data/{}.{}.csv".format(product_id, tag), "w") for review in reviews: print review f.write("{},{}\n".format( tag, pipe( review, _.split("\n"), map(_.strip()), map(_.encode("utf-8")), SF(" ".join)(_) ) )) f.close()
def compute_up(expr, data, scope=None, **kwargs): data = lower_column(data) grouper = compute( expr.grouper, scope, post_compute=False, return_type='native', **kwargs ) app = expr.apply reductions = [ compute( val, data, post_compute=None, return_type='native', ).label(name) for val, name in zip(app.values, app.fields) ] froms = list(unique(chain(get_all_froms(grouper), concat(map(get_all_froms, reductions))))) inner_cols = list(getattr(grouper, 'inner_columns', [grouper])) grouper_cols = inner_cols[:] inner_cols.extend(concat( getattr(getattr(r, 'element', None), 'inner_columns', [r]) for r in reductions )) wheres = unify_wheres([grouper] + reductions) sel = unify_froms(sa.select(inner_cols, whereclause=wheres), froms) return sel.group_by(*grouper_cols)
def select_or_selectable_to_frame(el, **kwargs): columns, rows = batch(el) row = next(rows, None) if row is None: return pd.DataFrame(columns=columns) return pd.DataFrame(list(chain([tuple(row)], map(tuple, rows))), columns=columns)
def __getattr__(self, key): if key == '_hash': raise AttributeError() try: return _attr_cache[(self, key)] except: pass try: result = object.__getattribute__(self, key) except AttributeError: fields = dict(zip(map(valid_identifier, self.fields), self.fields)) if self.fields and key in fields: if isscalar(self.dshape.measure): # t.foo.foo is t.foo result = self else: result = self[fields[key]] else: d = toolz.merge(schema_methods(self.dshape.measure), dshape_methods(self.dshape)) if key in d: func = d[key] if func in method_properties: result = func(self) else: result = boundmethod(func, self) else: raise _attr_cache[(self, key)] = result return result
def fractional_slice(task, axes): """ >>> fractional_slice(('x', 5.1), {0: 2}) # doctest: +SKIP (getitem, ('x', 6), (slice(0, 2),)) >>> fractional_slice(('x', 3, 5.1), {0: 2, 1: 3}) # doctest: +SKIP (getitem, ('x', 3, 5), (slice(None, None, None), slice(-3, None))) >>> fractional_slice(('x', 2.9, 5.1), {0: 2, 1: 3}) # doctest: +SKIP (getitem, ('x', 3, 5), (slice(0, 2), slice(-3, None))) """ rounded = (task[0],) + tuple(map(round, task[1:])) index = [] for i, (t, r) in enumerate(zip(task[1:], rounded[1:])): depth = axes.get(i, 0) if t == r: index.append(slice(None, None, None)) elif t < r: index.append(slice(0, depth)) elif t > r and depth == 0: index.append(slice(0, 0)) else: index.append(slice(-depth, None)) index = tuple(index) if all(ind == slice(None, None, None) for ind in index): return task else: return (getitem, rounded, index)
def compute_up(expr, data, **kwargs): if not valid_grouper(expr): raise TypeError("Grouper must have a non-nested record or one " "dimensional collection datashape, " "got %s of type %r with dshape %s" % (expr.grouper, type(expr.grouper).__name__, expr.dshape)) s = alias_it(data) if valid_reducer(expr.apply): reduction = compute(expr.apply, s, post_compute=False) else: raise TypeError('apply must be a Summary expression') grouper = get_inner_columns(compute(expr.grouper, s, post_compute=False)) reduction_columns = pipe(reduction.inner_columns, map(get_inner_columns), concat) columns = list(unique(chain(grouper, reduction_columns))) if (not isinstance(s, sa.sql.selectable.Alias) or (hasattr(s, 'froms') and isinstance(s.froms[0], sa.sql.selectable.Join))): assert len(s.froms) == 1, 'only a single FROM clause supported for now' from_obj, = s.froms else: from_obj = None return reconstruct_select(columns, getattr(s, 'element', s), from_obj=from_obj, group_by=grouper)
def compute_down(expr, data, **kwargs): """ Compile a blaze expression to a sparksql expression""" leaves = expr._leaves() # make sure we only have a single leaf node if len(leaves) != 1: raise ValueError('Must compile from exactly one root database') leaf, = leaves # field expressions on the database are Field instances with a record # measure whose immediate child is the database leaf tables = pipe(expr._subterms(), filter(istable(leaf)), list) # raise if we don't have tables in our database if not tables: raise ValueError('Expressions not referencing a table cannot be ' 'compiled') # make new symbols for each table new_leaves = [symbol(t._name, t.dshape) for t in tables] # sub them in the expression expr = expr._subs(dict(zip(tables, new_leaves))) # compute using sqlalchemy scope = dict(zip(new_leaves, map(make_sqlalchemy_table, tables))) query = compute(expr, scope) # interpolate params compiled = literalquery(query, dialect=HiveDialect()) return data.sql(str(compiled))
def is_invalid(value): return not any(map(lambda r: value in r, valid_ranges))
def statements_individual_creator(rules: List[Rule]): statements = get_rules_statements(rules) statements_vector = pipe(statements, map(lambda s: s.threshold), list, np.array) return creator.Individual(statements_vector)
def cli(dry_run, input_dir, ignore_refresh, output_dir, pattern, verbose, transaction): dag = {} file_paths = walk_directory_recursively(input_dir) if verbose: print('Found %d Scripts in %s' % (len(file_paths), input_dir)) entities = pipe(file_paths, map(lambda file_path: process_file(file_path, pattern)), list ) if verbose: total_views = pipe(entities, map(get('views')), map(count), sum ) total_deps = pipe(entities, map(get('view_dependencies')), map(count), sum ) print('Identified %d Materialized Views, Containing %d View Dependencies' % (total_views, total_deps)) view_content = {} dag = {} for entity in entities: view_content.update( {view: format_content(entity) for view in entity['views']} ) dag.update( {view: entity['view_dependencies'] for view in entity['views']} ) sorted_views = toposort_flatten(dag) if verbose: print("\nMaterialized View Dependencies:") pprint_color( valmap(lambda val: list(val), valfilter(lambda val: val, dag)) ) create_views = pipe(sorted_views, map(lambda view: view_content[view]), unique, list ) create_script = generate_script(create_views, transaction) refresh_prefix = 'REFRESH MATERIALIZED VIEW CONCURRENTLY ' if transaction: refresh_prefix = ' ' + refresh_prefix refresh_views = pipe(sorted_views, filter(lambda view: re.search(pattern, view) and not (ignore_refresh and re.search(ignore_refresh , view))), map(lambda view: refresh_prefix + view + ';'), list ) if verbose: print('Selecting %d Materialized Views for Refresh' % len(refresh_views)) refresh_script = generate_script(refresh_views, transaction, "\n\n") if dry_run: print('Dry Run Option Enabled - Skipping Script Generation') return timestr = time.strftime("%Y%m%d-%H%M%S") serialize_script('create', timestr, create_script, output_dir, verbose) serialize_script('refresh', timestr, refresh_script, output_dir, verbose)
def sql_to_iterator(t, **kwargs): _, rows = batch(sa.select([t])) return map(tuple, rows)
def deep_map_f(keys, f, dictionary): return deep_transform(keys, comp(list, map(f)), dictionary)
def validate(doc, method): get_so_count = compose(len, list, unique, map(lambda x: x.against_sales_order)) if get_so_count(doc.items) != 1: frappe.throw( frappe._("Cannot create document with multiple Sales Orders"))
def _create_sales_invoice(doc): invoice = frappe.new_doc("Sales Invoice") invoice.flags.ignore_permissions = True make_sales_invoice(doc.name, target_doc=invoice) invoice.is_pos = 1 invoice.payments = [] invoice.append("payments", { "mode_of_payment": "Cash", "amount": invoice.rounded_total }) invoice.save() invoice.submit() _get_so = compose(first, filter(None), map(lambda x: x.against_sales_order), lambda x: x.items) def _get_item_description(items): item_names = [x.item_name for x in items] return ("{}".format(item_names[0]) if len(item_names) == 1 else "{} +{} more item(s)".format(item_names[0], len(item_names) - 1)) def _format_datetime(dt_str): return "{0:%a} {0:%b} {0.day}, {0.year} {0:%H}:{0:%M} {0:%p}".format( frappe.utils.get_datetime(dt_str))
compose_left(*funcs)(data) map(accept_one(compose), zip(repeat(map), funcs)) def modafinil() -> None: dates, _numbers, treatments, _times = pipe( open("data/modafinil-data"), map(curry(str.split)(maxsplit=3)), # list of rows map(curry(zip, row_format)), # Iteratable[Tuple[Callable, str]] map(map(variadic(apply))), # Iterable[Iterable[apply(*Tuple[Callable, str])]] # that is, Iterable[Iterable[X]] ) dates, _numbers, treatments, _times = pipe( open("data/modafinil-data"), map(curry(str.split)(maxsplit=3)), map(parsed_row), transpose, ) return pd.Series(treatments, index=pd.DatetimeIndex(dates, name="dates")) if __name__ == "__main__": sleep = fitbit.get_data("2019-03-22", "2019-04-27") modafinil_treatments = modafinil() summary = test(sleep.efficiency, modafinil_treatments) print("; ".join(map("{}={}".format, summary.items())))
def sparksql_dataframe_to_list(df, dshape=None, **kwargs): result = df.collect() if (dshape is not None and iscollection(dshape) and not isrecord(dshape.measure)): return list(map(get(0), result)) return result
def get_history(name): booking_logs = frappe.get_all( "Booking Log", filters={"booking_order": name}, fields=[ "'Booking Log' as doctype", "posting_datetime", "booking_order", "shipping_order", "station", "activity", "loading_operation", "loading_unit", "sum(no_of_packages) as no_of_packages", "sum(weight_actual) as weight_actual", ], order_by="posting_datetime", group_by="posting_datetime,activity", ) get_shipping_logs = compose( concat, map(lambda x: frappe.get_all( "Shipping Log", filters={ "shipping_order": x[0].get("shipping_order"), "activity": ("in", ["Stopped", "Moving"]), "posting_datetime": ( "between", [ x[0].get("posting_datetime"), x[1].get( "posting_datetime") ], ), }, fields=[ "'Shipping Log' as doctype", "posting_datetime", "shipping_order", "station", "activity", ], order_by="posting_datetime", ) if x[0].get("shipping_order") else []), sliding_window(2), ) shipping_logs = get_shipping_logs( booking_logs + [{ "posting_datetime": frappe.utils.now() }]) def get_message(log): if log.get("doctype") == "Booking Log": if log.get("loading_unit") == "Weight": return "{} {} units by weight at {}".format( log.get("activity"), abs(log.get("weight_actual")), log.get("station"), ) return "{} {} packages at {}".format( log.get("activity"), abs(log.get("no_of_packages")), log.get("station")) if log.get("doctype") == "Shipping Log": prepo = "to" if log.get("activity") == "Moving" else "at" return "{} {} {}".format(log.get("activity"), prepo, log.get("station")) return "" def get_link(log): if log.get("doctype") == "Shipping Log": return "#Form/Shipping Order/{}".format(log.get("shipping_order")) if log.get("doctype") == "Booking Log" and log.get( "loading_operation"): return "#Form/Loading Operation/{}".format( log.get("loading_operation")) return "" def get_event(log): return { "datetime": log.get("posting_datetime"), "status": log.get("activity"), "message": get_message(log), "link": get_link(log), } return sorted( [get_event(x) for x in concat([booking_logs, shipping_logs])], key=lambda x: frappe.utils.get_datetime(x.get("datetime")), )
def get_rules_statements(rules): return pipe( rules, map(lambda r: list(r.statements)), reduce(list.__add__), )
def _condition(self, types, *args, **kwargs) -> bool: if not isinstance(types, Iterable): types = tuple([types]) return pipe(zip(args, types), map(lambda arg: isinstance(*arg)), all)
return f(*args, **kwargs) except: if i >= n - 1: raise time.sleep(timeout) return helper2 return helper1 def bottom(x): pass force = compose(any, map(bottom)) def my_fun(x): return x + 1 def url_join(*args_): args = [x for x in args_ if x] assert ''.join(args).count('?') <= 1 assert ''.join(args).count('?') == 0 or '?' in args[-1] a1, params = partition(lambda x: '?' not in x, args) if params:
def sql_to_iterator(t, bind=None, **kwargs): _, rows = batch(sa.select([t]), bind=bind) return map(tuple, rows)
def discover_row_proxy(rp): return Record(list(zip(rp.keys(), map(discover, rp.values()))))
def compute_up(t, lhs, rhs, **kwargs): if isinstance(lhs, ColumnElement): lhs = select(lhs) if isinstance(rhs, ColumnElement): rhs = select(rhs) if name(lhs) == name(rhs): left_suffix, right_suffix = t.suffixes lhs = lhs.alias('%s%s' % (name(lhs), left_suffix)) rhs = rhs.alias('%s%s' % (name(rhs), right_suffix)) lhs = alias_it(lhs) rhs = alias_it(rhs) if isinstance(lhs, Select): lhs = lhs.alias(next(aliases)) left_conds = [lhs.c.get(c) for c in listpack(t.on_left)] else: ldict = dict((c.name, c) for c in inner_columns(lhs)) left_conds = [ldict.get(c) for c in listpack(t.on_left)] if isinstance(rhs, Select): rhs = rhs.alias(next(aliases)) right_conds = [rhs.c.get(c) for c in listpack(t.on_right)] else: rdict = dict((c.name, c) for c in inner_columns(rhs)) right_conds = [rdict.get(c) for c in listpack(t.on_right)] condition = reduce(and_, map(eq, left_conds, right_conds)) # Perform join if t.how == 'inner': join = _join_selectables(lhs, rhs, condition=condition) main = lhs elif t.how == 'left': main, other = lhs, rhs join = _join_selectables(lhs, rhs, condition=condition, isouter=True) elif t.how == 'right': join = _join_selectables(rhs, lhs, condition=condition, isouter=True) main = rhs else: # http://stackoverflow.com/questions/20361017/sqlalchemy-full-outer-join raise ValueError("SQLAlchemy doesn't support full outer Join") """ We now need to arrange the columns in the join to match the columns in the expression. We care about order and don't want repeats """ if isinstance(join, Select): def cols(x): if isinstance(x, Select): return list(x.inner_columns) else: return list(x.columns) else: cols = lambda x: list(x.columns) main_cols = cols(main) left_cols = cols(lhs) left_names = set(map(_getname, left_cols)) right_cols = cols(rhs) right_names = set(map(_getname, right_cols)) left_suffix, right_suffix = t.suffixes fields = [ f.replace(left_suffix, '').replace(right_suffix, '') for f in t.fields ] columns = [c for c in main_cols if c.name in t._on_left] columns += [ _clean_join_name(right_names, left_suffix, c) for c in left_cols if c.name in fields and c.name not in t._on_left ] columns += [ _clean_join_name(left_names, right_suffix, c) for c in right_cols if c.name in fields and c.name not in t._on_right ] if isinstance(join, Select): return join.with_only_columns(columns) else: return sa.select(columns, from_obj=join)
def select_to_iterator(sel, dshape=None, **kwargs): func = pluck(0) if dshape and isscalar(dshape.measure) else map(tuple) _, rows = batch(sel) return func(rows)
def get_inner_columns(sel): try: return list(sel.inner_columns) except AttributeError: return list(map(lower_column, sel.c.values()))
'string': sa.Text, 'date': sa.Date, 'time': sa.Time, 'datetime': sa.DateTime, 'bool': sa.Boolean, "timedelta[unit='D']": sa.Interval(second_precision=0, day_precision=9), "timedelta[unit='h']": sa.Interval(second_precision=0, day_precision=0), "timedelta[unit='m']": sa.Interval(second_precision=0, day_precision=0), "timedelta[unit='s']": sa.Interval(second_precision=0, day_precision=0), "timedelta[unit='ms']": sa.Interval(second_precision=3, day_precision=0), "timedelta[unit='us']": sa.Interval(second_precision=6, day_precision=0), "timedelta[unit='ns']": sa.Interval(second_precision=9, day_precision=0), # ??: sa.types.LargeBinary, } revtypes = dict(map(reversed, types.items())) revtypes.update({ sa.DATETIME: datetime_, sa.TIMESTAMP: datetime_, sa.FLOAT: float64, sa.DATE: date_, sa.BIGINT: int64, sa.INTEGER: int_, sa.BIGINT: int64, sa.types.NullType: string, sa.REAL: float32, sa.Float: float64, sa.Float(precision=24): float32, sa.Float(precision=53): float64, })
def filter_memory_data(yaml_data): """Filter the memory time data from the meta.yaml's Args: yaml_data: the benchmark YAML data Returns: memory versus time data """ def time_ratio(data): """Calcuate the sim_time over wall_time ration """ def not0(value): """Set to 1e-10 if 0 """ if value == 0: return 1e-10 return value return pipe( data[-1], juxt( lambda x: x.get("sim_time", x.get("time")), lambda x: x.get("wall_time", x.get("time")), ), lambda x: float(x[1]) / not0(float(x[0])), ) def memory_usage(data): """Calculate the memory usage in KB """ unit_map = dict(GB=1048576., KB=1., MB=1024., B=1. / 1024.) if isinstance(data, dict): data_ = data else: data_ = data[-1] key = next(k for k in data_.keys() if "value" in k) return float(data_[key]) * unit_map[data_.get("unit", "KB")] def make_datum(data): """Build an item in the data list for one simulation """ return dict( name="efficiency", values=[ dict( time_ratio=time_ratio(data["run_time"]), memory_usage=memory_usage(data["memory_usage"]), ) ], ) return pipe( yaml_data, dict, valmap(lambda x: x["data"]), valmap( filter(lambda item: item["name"].lower() in ("memory_usage", "run_time")) ), valmap(map(lambda x: (x["name"], x["values"]))), valmap(dict), valmap(make_datum), itemmap(lambda item: (item[0], update_dict(item[1], name=item[0]))), lambda dict_: sorted(list(dict_.values()), key=lambda item: item["name"]), )
def get_inner_columns(sel): inner_columns = list(sel.inner_columns) assert len(inner_columns) == 1, 'ScalarSelect should have only ONE column' return list(map(lower_column, inner_columns))
def __call__(self, *args, **kwargs): values = map(lambda x: x(*args, **kwargs), self) return OrderedDict(zip(self, list(values)))
def get_inner_columns(f): unique_columns = unique(concat(map(get_inner_columns, f.clauses))) lowered = [x.label(getattr(x, 'name', None)) for x in unique_columns] return [getattr(sa.func, f.name)(*lowered)]
proc(e) if __name__ == "__main__": if len(sys.argv) != 1: print("USAGE: python3 generate_dd_txt.py ") sys.exit(1) fname, output_dir = sys.argv[0], "zrm_phone_xhe_shape" if not Path(output_dir).exists(): os.makedirs(output_dir) char_to_shape = pipe(CharShapeTable.select(), map(lambda e: (e.char, e.shapes)), reduceby(lambda e: e[0], lambda e1, e2: e1), valmap(lambda e: e[1]), dict ) print(f"total {len(char_to_shape)} char shapes") char_to_phones = pipe(CharPhoneTable.select(), map(lambda e: (e.char, e.zrm)), groupby(lambda e: e[0]), valmap(lambda phones: [e[1] for e in phones]), dict ) print(f"total {len(char_to_phones)} char phones") one_hit_char_items = generate_one_hit_char(60000)
def is_valid(field, value): return any(map(lambda _range: value in _range, rules[field]))
current_and_distance: Tuple[Text, int]) -> Iterable[Tuple[Text, int]]: current, distance = current_and_distance if distance < radius: yield from map(lambda neighbor: (neighbor, distance + 1), get_neighbors(current)) return map( toolz.first, graph_traverse(source=(source, 0), get_neighbors=get_neighbors_limiting_radius), ) edges_to_graph = toolz.compose( curried.valmap(toolz.compose(frozenset, curried.map(toolz.second))), curried.groupby(toolz.first), ) graph_to_edges = toolz.compose_left( curried.keymap(lambda x: (x, )), dict.items, curried.mapcat(functional.star(itertools.product)), ) reverse_graph = toolz.compose_left( graph_to_edges, curried.map(toolz.compose_left(reversed, tuple)), edges_to_graph) cliques_to_graph = toolz.compose_left( curried.mapcat(lambda clique: itertools.permutations(clique, r=2)),
def parse_ticket(ticket): return list(map(int, ticket.split(",")))
def json_lines_to_iterator(j, encoding='utf-8', **kwargs): with json_lines(j.path, encoding=encoding) as lines: for item in pipe(lines, filter(nonempty), map(json.loads)): yield item
def discover_sqlcontext(ctx): table_names = sorted(map(str, ctx.tableNames())) dshapes = zip(table_names, map(discover, map(ctx.table, table_names))) return datashape.DataShape(datashape.Record(dshapes))
import pytesseract import PIL.Image from toolz.curried import map, pipe, compose, get, do, curry, count, pluck, juxt, flip import pandas import skimage import skimage.measure import skimage.filters import skimage.morphology import json import sys import pickle fcompose = lambda *args: compose(*args[::-1]) mapdict = lambda **kwargs: map(lambda data: dict( dict((k, f(data)) for k, f in kwargs.items()), **data)) ## Helper functions @curry def dfassign(df, **kwargs): return df.assign(**dict(((k, f(df)) for k, f in kwargs.items()))) ## View the images reshape = lambda arr: arr if len(arr.shape) == 2 else arr[..., 0] to_array = lambda image: reshape(numpy.asarray(image.convert("L"))) def plt_arrays(arrs): """Plot a set of (n, n) arrays as row column sub plots.