def main(root, pattern='68/*block03/*trial00', frames=10, start=27.5, stop=38.5): _, jac = jacobian(root, pattern, frames) n = int(np.sqrt(len(jac.columns) / 9)) pca = lmj.pca.PCA(filename=PCA_FILE) for v in (0.5, 0.8, 0.9, 0.95, 0.98, 0.99, 0.995, 0.998, 0.999): print('variance', v, 'components', pca.num_components(v)) if os.path.exists(MODEL): dl = pickle.load(open(MODEL, 'rb')) else: dl = sklearn.decomposition.MiniBatchSparsePCA( n_components=100, alpha=0.0001, verbose=10) dl.fit(pca.encode(jac.dropna().values, retain=0.999)) pickle.dump(dl, open(MODEL, 'wb'), -1) df = pd.DataFrame(pca.decode(dl.components_, retain=0.999), columns=jac.columns) enc = sklearn.decomposition.sparse_encode( pca.encode(jac.dropna().loc[start:stop, :], retain=0.999), dl.components_, algorithm='omp', alpha=0.0001, ) kw = dict(vmin=-10, vmax=10, cmap='RdBu') lmj.plot.create_axes(111, spines=False).imshow(enc.T, **kw) lmj.plot.show() enc = sklearn.decomposition.sparse_encode( jac.dropna().loc[start:stop, :], pca.vecs.T[:len(dl.components_)], algorithm='omp', alpha=0.0001, ) kw = dict(vmin=-10, vmax=10, cmap='RdBu') lmj.plot.create_axes(111, spines=False).imshow(enc.T, **kw) lmj.plot.show() def find(i, g, b): cs = [c for c in jac.columns if '{}/'.format(g) in c and c.endswith(b)] return df[cs].loc[i, :].values.reshape((n, n)) def plot(where, i, g, b): ax = lmj.plot.create_axes(where, spines=False) ax.imshow(find(i, g, b), cmap='RdBu') for i in range(10): plot(331, i, 'x', 'x') plot(332, i, 'x', 'y') plot(333, i, 'x', 'z') plot(334, i, 'y', 'x') plot(335, i, 'y', 'y') plot(336, i, 'y', 'z') plot(337, i, 'z', 'x') plot(338, i, 'z', 'y') plot(339, i, 'z', 'z') lmj.plot.gcf().subplots_adjust(left=0, right=1, bottom=0, top=1, wspace=0, hspace=0) lmj.plot.show()
def fill(dfs, rank, window): '''Complete missing marker data using linear interpolation. This method alters the given `dfs` in-place. Parameters ---------- dfs : list of pd.DataFrame Frames of source data. The frames will be stacked into a single large frame and interpolated linearly, either in the data space or (if rank is not None) in principal component space. rank : float Number of principal components (if >1) or fraction of variance (if in (0, 1)) to retain in the encoded data. window : int Model windows of this many consecutive frames. ''' df = lmj.cubes.fill.stack(dfs, window) centers = lmj.cubes.fill.center(df) if rank is None: prediction, _, _ = lmj.cubes.fill.window(df, window, True) else: if not 0 < rank < 1: rank = int(rank) pca = lmj.pca.PCA() pos, _, _ = lmj.cubes.fill.window(df, window, None) pca.fit(pos) enc = pd.DataFrame(pca.encode(pos, retain=rank)) lin = enc.interpolate().ffill().bfill().values prediction = pca.decode(lin, retain=rank) lmj.cubes.fill.update(df, prediction, window) lmj.cubes.fill.restore(df, centers) lmj.cubes.fill.unstack(df, dfs)
def fill(dfs, rank, window): """Complete missing marker data using linear interpolation. This method alters the given `dfs` in-place. Parameters ---------- dfs : list of pd.DataFrame Frames of source data. The frames will be stacked into a single large frame and interpolated linearly, either in the data space or (if rank is not None) in principal component space. rank : float Number of principal components (if >1) or fraction of variance (if in (0, 1)) to retain in the encoded data. window : int Model windows of this many consecutive frames. """ df = lmj.cubes.fill.stack(dfs, window) centers = lmj.cubes.fill.center(df) if rank is None: prediction, _, _ = lmj.cubes.fill.window(df, window, True) else: if not 0 < rank < 1: rank = int(rank) pca = lmj.pca.PCA() pos, _, _ = lmj.cubes.fill.window(df, window, None) pca.fit(pos) enc = pd.DataFrame(pca.encode(pos, retain=rank)) lin = enc.interpolate().ffill().bfill().values prediction = pca.decode(lin, retain=rank) lmj.cubes.fill.update(df, prediction, window) lmj.cubes.fill.restore(df, centers) lmj.cubes.fill.unstack(df, dfs)
def main(root, pattern='68/*block03/*trial00', frames=10, start=27.5, stop=38.5): _, jac = jacobian(root, pattern, frames) n = int(np.sqrt(len(jac.columns) / 9)) pca = lmj.pca.PCA(filename=PCA_FILE) for v in (0.5, 0.8, 0.9, 0.95, 0.98, 0.99, 0.995, 0.998, 0.999): print('variance', v, 'components', pca.num_components(v)) if os.path.exists(MODEL): dl = pickle.load(open(MODEL, 'rb')) else: dl = sklearn.decomposition.MiniBatchSparsePCA(n_components=100, alpha=0.0001, verbose=10) dl.fit(pca.encode(jac.dropna().values, retain=0.999)) pickle.dump(dl, open(MODEL, 'wb'), -1) df = pd.DataFrame(pca.decode(dl.components_, retain=0.999), columns=jac.columns) enc = sklearn.decomposition.sparse_encode( pca.encode(jac.dropna().loc[start:stop, :], retain=0.999), dl.components_, algorithm='omp', alpha=0.0001, ) kw = dict(vmin=-10, vmax=10, cmap='RdBu') lmj.plot.create_axes(111, spines=False).imshow(enc.T, **kw) lmj.plot.show() enc = sklearn.decomposition.sparse_encode( jac.dropna().loc[start:stop, :], pca.vecs.T[:len(dl.components_)], algorithm='omp', alpha=0.0001, ) kw = dict(vmin=-10, vmax=10, cmap='RdBu') lmj.plot.create_axes(111, spines=False).imshow(enc.T, **kw) lmj.plot.show() def find(i, g, b): cs = [c for c in jac.columns if '{}/'.format(g) in c and c.endswith(b)] return df[cs].loc[i, :].values.reshape((n, n)) def plot(where, i, g, b): ax = lmj.plot.create_axes(where, spines=False) ax.imshow(find(i, g, b), cmap='RdBu') for i in range(10): plot(331, i, 'x', 'x') plot(332, i, 'x', 'y') plot(333, i, 'x', 'z') plot(334, i, 'y', 'x') plot(335, i, 'y', 'y') plot(336, i, 'y', 'z') plot(337, i, 'z', 'x') plot(338, i, 'z', 'y') plot(339, i, 'z', 'z') lmj.plot.gcf().subplots_adjust(left=0, right=1, bottom=0, top=1, wspace=0, hspace=0) lmj.plot.show()