def resolve_value(self, info, keys=None, k=None, last=None, window=None): path = join(Args.logdir, self.id[1:]) realpath(path) _ = read_dataframe(path) if keys: df = _[keys].dropna() return {k: df[k].values.tolist() for k in keys} else: df = _.dropna() return {k: v.values.tolist() for k, v in df.items()}
def resolve_keys(self, info): df = read_dataframe(join(Args.logdir, self.id[1:])) keys = df.dropna().keys() return list(keys)
def get_series( metrics_files=tuple(), prefix=None, head=None, tail=None, x_low=None, x_high=None, x_edge=None, # OneOf('start', 'after', 'mid', 'mode') k=None, x_align=None, # OneOf(int, 'left', 'right') x_key=None, y_key=None, y_keys=None, label=None): warning = None assert not y_key or not y_keys, "yKey and yKeys can not be trueful at the same time" assert y_key or y_keys, "yKey and yKeys can not be both falseful." assert head is None or tail is None, "head and tail can not be trueful at the same time" if not prefix: for id in metrics_files: assert isabs( id ), f"metricFile need to be absolute path is prefix is {prefix}. It is {id} instead." ids = [join(prefix or "", id) for id in metrics_files] dfs = [read_dataframe(join(Args.logdir, _id[1:])) for _id in ids] y_keys = y_keys or [y_key] join_keys = [k for k in {x_key, *y_keys} if k is not None] dataframes = [] for df in dfs: if df is None: continue if x_key is not None: df.set_index(x_key) if x_align is None: pass elif x_align == "start": # todo: this needs to be part of the join df[x_key] -= df[x_key][0] elif x_align == "end": df[x_key] -= df[x_key][-1] else: df[x_key] -= x_align else: df = df[y_keys] df['index'] = df.index df.set_index('index') # todo: maybe apply tail and head *after* dropna?? if tail is not None: df = df.tail(tail) if head is not None: df = df.head(head) inds = True if x_low is not None: inds &= df[x_key or "index"] >= x_low print("x_low >>>", inds) if x_high is not None: inds &= df[x_key or "index"] <= x_high print("x_high >>>", inds) if inds is not True: df = df.loc[inds] # todo: only dropna if we are not using ranges. <need to test> try: if head is None and tail is None: dataframes.append(df[join_keys].dropna()) else: dataframes.append(df[join_keys]) except KeyError as e: raise KeyError( f"{join_keys} contain keys that is not in the dataframe. " f"Keys available include {df.keys()}") from e if not dataframes: # No dataframe, return `null`. return None all = pd.concat(dataframes) if x_key: all = all.set_index(x_key) all.rank(method='first') if k is not None: bins = pd.qcut(all.index, k, duplicates='drop') grouped = all.groupby(bins) # df else: grouped = all.groupby(level=0) # treat all numbers in bin as equal. For raw (not averaged, or averaged) grouped[y_keys].agg(['count', 'mean', 'min', 'max']) df = grouped[y_keys].describe( percentiles=[0.25, 0.75, 0.5, 0.05, 0.95]).reset_index() if k is not None: if x_edge == "right" or x_edge is None: df['__x'] = df['index'].apply(lambda r: r.right) elif x_edge == "left": df['__x'] = df['index'].apply(lambda r: r.left) elif x_edge == "mean": df['__x'] = df['index'].apply(lambda r: 0.5 * (r.left + r.right)) # todo: use mode of each bin else: raise KeyError( f"x_edge {[x_edge]} should be OneOf['start', 'after', 'mid', 'mode']" ) else: df['__x'] = df.index return Series(metrics_files, _df=df.sort_values(by="__x"), metrics_files=metrics_files, prefix=prefix, x_key=x_key or "index", y_key=y_key, y_keys=y_keys, label=label, warning=warning)