def multi_code_sklearn(arrs, base=None, method=None, init=None, c=None, params=None): """Multi-array Non-negative matrix factorization using scikits-learn. - arrs(``path+``) input arrays (loci x scaled features) see: ``scale_features``. - base(``str``) common basename for the output. - c(``int``) number of expected histone codes (factorization rank). - init(``str``) matrix factorization initialization method. """ kwargs = parse_params(params, {"max_iter":1000}) marks, xs = load_arrs(arrs) hs = [] for x in xs: nmf = decomposition.NMF(n_components=c, init=init, sparseness='components', **kwargs) nmf.fit(x) hs.append(nmf.components_) H = np.vstack(hs) X = np.vstack(xs) W = np.zeros((X.shape[0], len(H))) for j in range(0, X.shape[0]): W[j, :], _ = nnls(H.T, X[j, :]) # write codes ofnc = base + ("_%s-c#%s-i#%s-p#%s.epi" % ("pgnmf", c, init, (params or ""))) write_codes(ofnc, H, marks) # write ofna = base + ("_%s-c#%s-i#%s-p#%s.arr" % ("pgnmf", c, init, (params or ""))) write_values(ofna, W, len(arrs)*c) return ofnc, ofna
def recode_sklearn(arr=None, epi=None, odn=path("."), base=None): """(internal) projects arr onto codes - arr(``path``) - epi(``path``) """ arr_marks, X = load_arr(arr) epi_marks, H = load_epi(epi) assert arr_marks == epi_marks W = np.zeros((X.shape[0], len(H))) for j in range(0, X.shape[0]): W[j, :], _ = nnls(H.T, X[j, :]) base = base or arr.basename().splitext()[0] + "_" + epi.basename().splitext()[0] ofn = odn / (base + ".arr") # write write_values(ofn, W, W.shape[1])
def recode_sklearn(arr=None, epi=None, odn=path("."), base=None): """(internal) projects arr onto codes - arr(``path``) - epi(``path``) """ arr_marks, X = load_arr(arr) epi_marks, H = load_epi(epi) assert arr_marks == epi_marks W = np.zeros((X.shape[0], len(H))) for j in range(0, X.shape[0]): W[j, :], _ = nnls(H.T, X[j, :]) base = base or arr.basename().splitext()[0] + "_" + epi.basename( ).splitext()[0] ofn = odn / (base + ".arr") # write write_values(ofn, W, W.shape[1])
def multi_code_sklearn(arrs, base=None, method=None, init=None, c=None, params=None): """Multi-array Non-negative matrix factorization using scikits-learn. - arrs(``path+``) input arrays (loci x scaled features) see: ``scale_features``. - base(``str``) common basename for the output. - c(``int``) number of expected histone codes (factorization rank). - init(``str``) matrix factorization initialization method. """ kwargs = parse_params(params, {"max_iter": 1000}) marks, xs = load_arrs(arrs) hs = [] for x in xs: nmf = decomposition.NMF(n_components=c, init=init, sparseness='components', **kwargs) nmf.fit(x) hs.append(nmf.components_) H = np.vstack(hs) X = np.vstack(xs) W = np.zeros((X.shape[0], len(H))) for j in range(0, X.shape[0]): W[j, :], _ = nnls(H.T, X[j, :]) # write codes ofnc = base + ("_%s-c#%s-i#%s-p#%s.epi" % ("pgnmf", c, init, (params or ""))) write_codes(ofnc, H, marks) # write ofna = base + ("_%s-c#%s-i#%s-p#%s.arr" % ("pgnmf", c, init, (params or ""))) write_values(ofna, W, len(arrs) * c) return ofnc, ofna