def get_path(): """Return the local file path for this file. Returns: the filepath """ return pipe(__file__, os.path.realpath, os.path.split, get(0))
def diagnostic_yield(self, metric='completeness', cutoff=1, superblock_ids=None, group_id=None, sample_ids=None): """Calculate diagnostic yield.""" # extract column to filter on metric_column = getattr(BlockData, metric) # set up the base query for all blocks total_query = self.total_count(BlockData) if superblock_ids: # apply the superblock filter on the Block class level total_query = total_query.join(BlockData.parent)\ .filter(Block.superblock_id.in_(superblock_ids)) # extend base query to include only passed blocks pass_query = total_query.filter(metric_column >= cutoff) # optionally limit query queries = [limit_query(query, group=group_id, samples=sample_ids) for query in (total_query, pass_query)] # group multiple queries by sample ID (first column) metrics = groupby(get(0), concat(queries)) # iterate over all values, concat different query results, and keep # only the unique values (excluding second sample_id) combined = (unique(concat(values)) for values in itervalues(metrics)) # calculate diagnostic yield by simple division for sample_id, group_id, total, covered in combined: yield sample_id, group_id, (covered / total)
def add_column(self, variables: Union[List[Text], Text]) -> None: """ Adds a column to the dataframe Paramaters: variable (Union[List[Text],Text]): What variable(s) to add """ if type(variables) is not list: variables = [variables] for var in variables: try: assert var not in self.vars except AssertionError: raise ValueError( "Cannot add column {0}: already in frame".format(var)) self.vars += variables newcols = { var: lambda state: CP.PropsSI(var, self.xvar, state[ self.xvar], self.yvar, state[self.yvar], self.fluid) for var in variables } buffer = {key: [] for key in newcols} for index, row in self.data.iterrows(): buffer = itemmap( lambda tup: (tup[0], tup[1] + [apply(get(tup[0], newcols), row)]), buffer) for key in newcols: self.data[key] = pd.Series(buffer[key], index=self.data.index) self.make_units() self.make_meta()
def lookup_mac(mac): return _.pipe( requests.get(f'http://macvendors.co/api/{mac}'), __.maybe_json(default={}), _.get('result', default={}), lambda d: {'mac': mac, 'info': d}, )
def get_service_step(service_recipe): """ Get step timedelta: The smaller duration of service_recipe's periods. """ def diff(start, end): return end - start res_delta_diffs = compose(map(lambda p: diff(*p)), get('delta_periods')) return compose(min, map(min), map(res_delta_diffs))(service_recipe)
def remove_indexes(iterable: Iterable, indexes: List[int]) -> Iterable: return pipe( iterable, add_index, filter(decorate_unpack(lambda i, _: i not in indexes)), map(get(1)), list, )
def _county_fips_name(fips): return pipe( Granularity.COUNTY, geo_data, curried.get("features"), curry(filter, lambda region: region["id"] == fips), excepts( StopIteration, lambda x: pipe(x, first, curried.get_in(["properties", "NAME"])), lambda _: None))
def hist(hist_function, *, options={}, **interact_params): """ Generates an interactive histogram that allows users to change the parameters of the input hist_function. Args: hist_function (Array | (*args -> Array int | Array float)): Function that takes in parameters to interact with and returns an array of numbers. These numbers will be plotted in the resulting histogram. Kwargs: {options} interact_params (dict): Keyword arguments in the same format as `ipywidgets.interact`. One argument is required for each argument of `hist_function`. Returns: None >>> def gen_random(n_points): ... return np.random.normal(size=n_points) >>> hist(gen_random, n_points=(0, 1000, 10)) interactive(...) """ params = { 'mark': { 'sample': _array_or_placeholder(hist_function), 'bins': tz.get('bins'), 'normalized': tz.get('normalized'), 'scales': ( lambda opts: {'sample': opts['x_sc'], 'count': opts['y_sc']} ), }, } hist, fig = _create_plot(mark=bq.Hist, options=options, params=params) def wrapped(**interact_params): hist.sample = util.call_if_needed(hist_function, interact_params) display_widgets = widgets.interactive(wrapped, **interact_params) display(display_widgets) display(fig)
def tom_base_state_to_base_state(tom_base_state): tom_base_state = valmap(compose(np.copy, np.flip), tom_base_state) q, T, z, rho = get(["qv", "T", "z", "rho"], tom_base_state) return { "QT": q * 1000.0, "SLI": T + grav / cp * z, "height_center": z, "density": rho }
def update(self, output): target, image_dets, batch_size = get(["target", "preds", "batch_size"], output) image_gts = target[0] for dets, gts in zip(image_dets, image_gts): image_id = gts[0]['image_id'] for d in dets: d = {**d, 'image_id': image_id} self.res.append(d)
def render_tabular(api, options=None): """Entry point for the tabular reporter interface.""" # determine separator separator = options.get('report.separator', '\t') human = options.get('report.human') panel = options.get('report.panel') samples = options.get('report.samples') group = options.get('report.group') # read gene panel file if it has been set if panel: superblock_ids = [line.rstrip() for line in panel] else: superblock_ids = None # get sample ID, group and cutoff from metadata sample_query = limit_query(api.samples(), group=group, samples=samples) metadata = ((sample.id, sample.group_id, sample.cutoff) for sample in sample_query) # get the data base_query = limit_query( api.average_metrics(superblock_ids=superblock_ids), group=group, samples=samples) queries = [ metadata, base_query, api.diagnostic_yield(superblock_ids=superblock_ids, group_id=group, sample_ids=samples), api.sex_checker(group_id=group, sample_ids=samples) ] # group multiple queries by sample ID (first column) key_metrics = groupby(get(0), concat(queries)) # get the column names dynamically from the query headers = concatv(['sample_id', 'group_id', 'cutoff'], (column['name'] for column in base_query.column_descriptions), ['diagnostic yield', 'gender']) unique_headers = unique(headers) # iterate over all values, concat different query results, and keep # only the unique values (excluding second sample_id) data = (unique(concat(values)) for values in itervalues(key_metrics)) if human: # export key_metrics in a more human friendly format return tabulate(data, unique_headers) # yield headers return '\n'.join( cons('#' + separator.join(unique_headers), stringify_list(data, separator=separator)))
def has_cycle(graph): return toolz.pipe( graph, dict.keys, curried.map( _has_cycle(frozenset(), curried.get(seq=graph, default=()), set())), any, )
def load_state_dict(self, state_dict): epochs, model, optimizer, lr_scheduler, metric_history = get( ["epochs", "model", "optimizer", "lr_scheduler", "metric_history"], state_dict) self._epochs = epochs self.model.load_state_dict(model) self.optimizer.load_state_dict(optimizer) if self.lr_scheduler and lr_scheduler: self.lr_scheduler.load_state_dict(lr_scheduler) self.metric_history = metric_history
def output_transform(self, output): if self.mix: if isinstance(self.mix, Mixup) or (isinstance(self.mix, CutMix) and self.mix.lam): y_pred, y_true, batch_size = get( ["y_pred", "y_true", "batch_size"], output) y_a, y_b = y_true y_pred = torch.topk(y_pred, k=2, dim=1)[1] y_a_p = y_pred[:, 0] y_b_p = y_pred[:, 1] if self.mix.lam < 0.5: y_a_p, y_b_p = y_b_p, y_a_p num_corrects = ( self.mix.lam * y_a_p.eq(y_a).sum().cpu().float() + (1 - self.mix.lam) * y_b_p.eq(y_b).sum().cpu().float()) acc = num_corrects / batch_size return acc, batch_size y_pred, y_true = get(["y_pred", "y_true"], output) return accuracy(y_true, y_pred)
def dict_configuration_to_params(configuration): parameters = pipeline( configuration, [ instantiate_configuration, get('pipeline'), configuration_to_params, ], ) return parameters
def groupby_count(func): """Group the simulation data based on a function. Args: func: function to group by Returns: grouped data """ return pipe(get_yaml_data(), map(get(1)), groupby(func), valmap(count))
def concat(key): return pipe( lambda x: func(np.array(x)), delayed, lambda x: fmap(lambda y: (y.shape, x(y)), data.blocks), fmap(lambda x: (x[0], get(key, x[1]))), fmap(lambda x: from_delayed(key, x[0], x[1])), list, lambda x: da.concatenate(x, axis=0), )
def _common(self, Z, y): scale = Scaler(Z) transform = compose(prepend_x0, Scaler.normalize) X = transform(scale) data = zip(X, y) h_theta0 = [0.] * len(X[0]) coeff = compose(scale.denormalize, get(0), lin_reg(J, gradJ, h_theta0, it_max=2000)) h_thetad = coeff(data) return h_thetad
def find_yaml_file(path): """Find a YAML file in the path Args: path: find a YAML file on the path Returns: the path to the YAML file """ return pipe(path, glob_all_files_27, filter(lambda x: tail(5, x) == '.yaml'), list, get(0))
def test_combined(): """Run a combined test """ assert pipe( get_params(), assoc(key="fipy_iter", value=2), run_main, get("eta"), np.array, np.sum, lambda x: np.allclose(x, 1515.784), )
def render_tabular(api, options=None): """Entry point for the tabular reporter interface.""" # determine separator separator = options.get('report.separator', '\t') human = options.get('report.human') panel = options.get('report.panel') samples = options.get('report.samples') group = options.get('report.group') # read gene panel file if it has been set if panel: superblock_ids = [line.rstrip() for line in panel] else: superblock_ids = None # get sample ID, group and cutoff from metadata sample_query = limit_query(api.samples(), group=group, samples=samples) metadata = ((sample.id, sample.group_id, sample.cutoff) for sample in sample_query) # get the data base_query = limit_query(api.average_metrics(superblock_ids=superblock_ids), group=group, samples=samples) queries = [metadata, base_query, api.diagnostic_yield(superblock_ids=superblock_ids, group_id=group, sample_ids=samples), api.sex_checker(group_id=group, sample_ids=samples)] # group multiple queries by sample ID (first column) key_metrics = groupby(get(0), concat(queries)) # get the column names dynamically from the query headers = concatv(['sample_id', 'group_id', 'cutoff'], (column['name'] for column in base_query.column_descriptions), ['diagnostic yield', 'gender']) unique_headers = unique(headers) # iterate over all values, concat different query results, and keep # only the unique values (excluding second sample_id) data = (unique(concat(values)) for values in itervalues(key_metrics)) if human: # export key_metrics in a more human friendly format return tabulate(data, unique_headers) # yield headers return '\n'.join(cons('#' + separator.join(unique_headers), stringify_list(data, separator=separator)))
def comment_staticman(github_token, ci_data): """Sequence of functions to get data from github for staticman comment and then write the comment to github """ return sequence( pr_url, requests_get(github_token), lambda x: x.json(), get("body"), archieml.loads, comment_staticman_(ci_data), )(ci_data)
def from_tom_base_state(tom_base_state): """Return WaveEq from Tom's base state""" # TODO refactor this to an abstract factor (A) tom_base_state = valmap(compose(np.copy, np.flip), tom_base_state) q, T, z, rho = get(["qv", "T", "z", "rho"], tom_base_state) base_state = { "QT": q * 1000.0, "SLI": T + grav / cp * z, "height_center": z, "density": rho } return WaveEq(base_state)
def random_sample_crop(anns, size, min_iou, min_ar, max_ar, max_attemps=50): """ Crop the given PIL Image to random size and aspect ratio. A crop of random size (default: of 0.08 to 1.0) of the original size and a random aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop is finally resized to given size. This is popularly used to train the Inception networks. Parameters ---------- anns : ``List[Dict]`` Sequences of annotation of objects, containing `bbox` of [l, t, w, h]. size : ``Sequence[int]`` Size of the original image. min_iou : ``float`` Minimal iou between the objects and the cropped image. min_ar : ``Number`` Minimal aspect ratio. max_ar : ``Number`` Maximum aspect ratio. max_attemps: ``int`` Maximum attemps to try. """ width, height = size bboxes = np.stack([ann['bbox'] for ann in anns]) bboxes[:, 2:] += bboxes[:, :2] for _ in range(max_attemps): w = random.uniform(0.3 * width, width) h = random.uniform(0.3 * height, height) if h / w < min_ar or h / w > max_ar: continue l = random.uniform(0, width - w) t = random.uniform(0, height - h) r = l + w b = t + h patch = np.array([l, t, r, b]) ious = iou_1m(patch, bboxes) if ious.min() < min_iou: continue centers = (bboxes[:, :2] + bboxes[:, 2:]) / 2.0 mask = (l < centers[:, 0]) & (centers[:, 0] < r) & ( t < centers[:, 1]) & (centers[:, 1] < b) if not mask.any(): continue indices = np.nonzero(mask)[0].tolist() return get(indices, anns), l, t, w, h return None
def get_path(file_): """Return the local file path for this file. Returns: the filepath """ return pipe( file_, os.path.realpath, os.path.split, get(0) )
def compare_metrics_in_table( metrics_for_methods: Dict[str, ClassificationMetricsWithStatistics], include: Tuple[str, ...] = ('balanced_accuracy', 'roc_auc', 'recall', 'fpr'), format_method_name: Callable[[str], str] = identity, include_ci_for: Set[str] = None, include_delta: bool = False, ) -> List[List]: if include_ci_for is None: include_ci_for = include def get_line( method: str, metrics: Union[ClassificationMetrics, ClassificationMetricsWithStatistics] ): return [ format_method_name(method), *pipe( [ [ metrics[metric].mean, ( metrics[metric].mean - get_max_metric_value(metric, metrics_for_methods.values()) ) if include_delta else None, ] + ([format_ci(metrics[metric].ci)] if metric in include_ci_for else []) for metric in include ], flatten, compact, ), ] lines = pipe( [get_line(method, metrics) for method, metrics in metrics_for_methods.items()], partial(sorted, key=get(1), reverse=True), ) return format_structure( format_decimal, [ [ '', *flatten( map( lambda metric: [format_metric_short(metric), *(['Δ'] if include_delta else [])] + (['95% CI'] if metric in include_ci_for else []), include ) ) ], *lines, ], )
def __init__(self, anchors, pos_thresh=0.5, neg_thresh=None, get_label=get('category_id'), debug=False): self.anchors_xywh = flatten(anchors) self.anchors_ltrb = BBox.convert(self.anchors_xywh, BBox.XYWH, BBox.LTRB) self.pos_thresh = pos_thresh self.neg_thresh = neg_thresh self.get_label = get_label self.debug = debug
def compare_and_format_results( y_true: Series, results_for_methods: Dict[str, List[ModelCVResult]], include: Tuple[str] = ('balanced_accuracy', 'roc_auc', 'recall', 'fpr', 'f1', 'average_precision'), ) -> str: metrics_for_methods = valmap( lambda r: compute_classification_metrics_from_results_with_statistics( y_true, r), results_for_methods) def get_line(method: str, metrics: ClassificationMetricsWithStatistics): return [ format_method(method), *[metrics[metric].mean for metric in include] ] lines = sorted( [ get_line(method, metrics) for method, metrics in metrics_for_methods.items() ], key=get(1), reverse=True, ) max_by_column = [ None if index == 0 else max(pluck(index, lines)) for index in range(len(include) + 1) ] lines_with_differences = [ list( flatten([ item if item_index == 0 else [item, item - max_by_column[item_index]] for item_index, item in enumerate(line) ])) for line in lines ] return tabulate_formatted( format_structure( format_decimal, [ [ '', *flatten( map(lambda metric: [format_metric_short(metric), ''], include)) ], *lines_with_differences, ], ))
def get_data_frame(document_id: str, raw: bool = False, auth_args: Auth = Auth.shared()): auth = Auth(auth_args) document = Document.get(document_id, auth_args=auth_args) file_id = pipe( document.get("content", []), c.filter(lambda c: c.get("format", {}).get("code") == "ocr-text-file-id"), c.first, c.get("attachment", default={}), c.get("url"), iffy(isa(str), lambda url: url.split("/")[-1]), ) if file_id is None: raise ValueError( f"No block file found for document: '{document_id}'") files = Files(auth.session()) filename = files.download(file_id, "/tmp/") frame = pd.read_json(filename, lines=True) os.remove(filename) if raw or len(frame) == 0: return frame return Block.sort( frame.drop(["Geometry"], axis=1).join( pd.json_normalize(frame.Geometry)).pipe( partial( Frame.expand, custom_columns=[ Frame.codeable_like_column_expander("Polygon") ], )).set_index("Id"))
def main(): doc = """dotrunner Usage: dotrunner <root> [--dry-run] dotrunner --version Options: -d --dry-run Perform dry run (don't apply changes to filesystem) --version Show version. -h --help Show this screen. """ args = docopt(doc, version='dotrunner {}'.format(VERSION)) (root, dry_run) = get(['<root>', '--dry-run'])(args) IO = DryRunIO if dry_run else FileSystemIO run(root, IO())
def load_state_dict(self, state_dict): epochs, model, optimizer, lr_scheduler, amp_state, metric_history = get( [ "epochs", "model", "optimizer", "lr_scheduler", "amp", "metric_history" ], state_dict) self._epochs = epochs self.model.load_state_dict(model) self.optimizer.load_state_dict(optimizer) if self.lr_scheduler and lr_scheduler: self.lr_scheduler.load_state_dict(lr_scheduler) if self.fp16 and amp_state is not None: from apex import amp amp.load_state_dict(amp_state) self.metric_history = metric_history
def main(): doc = """dotlinker Usage: dotlinker <from> <to> [--dry-run] dotlinker --version Options: -d --dry-run Perform dry run (don't apply changes to filesystem) --version Show version. -h --help Show this screen. """ args = docopt(doc, version="dotlinker 0.1.2") (source, target, dry_run) = get(['<from>', '<to>', '--dry-run'])(args) IO = DryRunIO if dry_run else FileSystemIO link(source, target, IO())
def output_transform(self, output): targets, preds, batch_size = get(["target", "preds", "batch_size"], output) gts = targets[0] if isinstance(gts[0], Image.Image): gts = [np.array(img) for img in gts] elif torch.is_tensor(gts): gts = gts.cpu().byte().numpy() v = np.mean([ mean_iou(preds[i], gts[i], self.num_classes) for i in range(batch_size) ]) return v, batch_size
def output_transform(self, output): y_true, y_pred, batch_size = get( ["y_true", "y_pred", "batch_size"], output) y_pred = y_pred.argmax(dim=1) accs = [] for i in range(batch_size): y = y_true[i] p = y_pred[i] tp = (y == p).sum() if self.ignore_index is not None: tp += (y == self.ignore_index).sum() accs.append(tp.cpu().item() / np.prod(y.shape)) acc = np.mean(accs) return acc, batch_size
def compute(t, lhs, rhs): lhs = compute(t.lhs, lhs) rhs = compute(t.rhs, rhs) col_idx_lhs = t.lhs.columns.index(t.on_left) col_idx_rhs = t.rhs.columns.index(t.on_right) lhs = lhs.keyBy(lambda x: x[col_idx_lhs]) rhs = rhs.keyBy(lambda x: x[col_idx_rhs]) # Calculate the indices we want in the joined table columns = t.lhs.columns + t.rhs.columns repeated_index = len(columns) - columns[::-1].index(t.on_right) - 1 wanted = list(range(len(columns))) wanted.pop(repeated_index) getter = get(wanted) reassemble = lambda x: getter(x[1][0] + x[1][1]) return lhs.join(rhs).map(reassemble)
def get_by_uuid(uuid, path='.'): """Get a Treant by short ID Args: uuid: a portion of the uuid path: the search path for Treants Returns: a Treant """ return pipe( path, dtr.discover, list, filter(lambda x: uuid in x.uuid), list, get(0, default=None) )
def compute(t, lhs, rhs): lhs = compute(t.lhs, lhs) rhs = compute(t.rhs, rhs) on_left = rowfunc(t.lhs[t.on_left]) on_right = rowfunc(t.rhs[t.on_right]) lhs = lhs.keyBy(on_left) rhs = rhs.keyBy(on_right) # Calculate the indices we want in the joined table columns = t.lhs.columns + t.rhs.columns repeated_index = set([len(t.lhs.columns) + t.rhs.columns.index(col) for col in listpack(t.on_right)]) wanted = [i for i in range(len(columns)) if i not in repeated_index] getter = get(wanted) reassemble = lambda x: getter(x[1][0] + x[1][1]) return lhs.join(rhs).map(reassemble)
def sparksql_dataframe_to_list(df, dshape=None, **kwargs): result = df.collect() if (dshape is not None and iscollection(dshape) and not isrecord(dshape.measure)): return list(map(get(0), result)) return result
def test_get_curried(): first = get(0) for p in pairs: first(p)
def get_service_duration(service_recipe): """ Get duration timedelta: The global duration of service_recipe. """ res_delta_ends = compose(last, lambda p: zip(*p), get('delta_periods')) return compose(max, map(max), map(res_delta_ends))(service_recipe)
def uppercase_first_letter_compose(word): comp = functoolz.compose(get(0), lambda c: c.upper()) return comp(word)
def test_into(): with collection([]) as coll: key = get(['name', 'amount']) assert set(into([], into(coll, bank), columns=['name', 'amount'])) ==\ set([('Alice', 100), ('Alice', 200), ('Bob', 100), ('Bob', 200), ('Bob', 300)])
def get_gifts(people): # ``pipe(data, f, g, h)`` is equivalent to ``h(g(f(data)))` return pipe(people, filter(lambda v: v['age'] < 18 and v['well_behaved']), mapcat(get(['name'])), list)