def f_colprocess_helper(trx, binds, surface_markers, mw, w_surface_markers): try: g = [binds] wg = [1] tg = ["feature_vector"] if w_surface_markers != 0: g.append(d_surface_markers.values) wg.append(w_surface_markers) tg.append("feature_vector") try: sys.path.append(os.path.join(sys.path[0], '../../schema')) import schema_qp except: from schema import schema_qp afx = schema_qp.SchemaQP(0.01, mw, params={ "require_nonzero_lambda": 1, "scale_mode_uses_standard_scaler": 1, "d0_type_is_feature_vector_categorical": 1, }, mode="scale") afx.fit(trx, g, tg, wg) return (pd.Series(np.sqrt(afx._wts)), afx._soln_info) except: print("Flag 67568.10 Saw exception in f_colprocess_helper") return (None, None)
def f_helper_mprun_schemawts_2(args): ax, dz, dir1, outsfx, min_corr, maxwt, strand, chromosome, adata_norm_style = args if adata_norm_style == 1: ax_l2norm = np.sqrt(np.sum(ax**2, axis=1)) ax = ax.copy() / (1e-12 + ax_l2norm[:, None]) elif adata_norm_style == 2: ax_l2norm = np.sqrt(np.sum(ax**2, axis=1)) ax = np.sort(ax.copy(), axis=1) / (1e-12 + ax_l2norm[:, None]) print("Flag 231.10 ") print(ax[-10:, -10:]) print(np.sum(ax**2, axis=1)[-10:]) dz_cols = dz.columns[:-2] dz_vals = dz.values[:, :-2] vstd = np.std(dz_vals.astype(float), axis=0) print("Flag 231.0275 ", vstd.shape, dz_vals.shape, flush=True) dz_vals = dz_vals.copy() / (1e-12 + vstd) try: sys.path.append(os.path.join(sys.path[0], '../../schema')) import schema_qp except: from schema import schema_qp sqp = schema_qp.SchemaQP(min_corr, maxwt, params={"dist_npairs": 1000000}, mode="scale") try: dz1 = sqp.fit_transform(dz_vals, [ax], ['feature_vector'], [1]) print("Flag 231.030 ", min_corr, maxwt, dz_vals.shape, ax.shape, flush=True) wtsx = np.sqrt(np.maximum(sqp._wts / np.sum(sqp._wts), 0)) except: print("ERROR: schema failed for ", min_corr, maxwt, strand, chromosome) wtsx = 1e12 * np.ones(dz_vals.shape[1]) wdf = pd.Series(wtsx, index=dz_cols).sort_values( ascending=False).reset_index().rename(columns={ "index": "fdist", 0: "wt" }) wdf.to_csv( "{0}/adata1_sqp_wts_mincorr{1}_maxw{2}_strand{4}_chr{5}_adatanorm{8}_{7}.csv" .format(dir1, min_corr, maxwt, 1, strand, chromosome, 5, outsfx, adata_norm_style), index=False)
def run_dataset_schema(tra, trb, binds, min_corr, max_w=1000, mode="both", d_surface_markers=None, w_surface_markers=0): alphabet = [chr(ord('A') + i) for i in range(26)] non_aa = np.array([ord(c) - ord('A') for c in "BJOUXZ"]) #list interepretation of string if "both" in mode: D = np.hstack([tra, trb]) letters = np.array(['a' + c for c in alphabet] + ['b' + c for c in alphabet]) to_delete = list(non_aa) + list(non_aa + 26) elif "tra" in mode: D = tra letters = ['{0}'.format(chr(ord('A') + i)) for i in range(26)] to_delete = list(non_aa) elif "trb" in mode: D = trb letters = ['{0}'.format(chr(ord('A') + i)) for i in range(26)] to_delete = list(non_aa) D = np.delete(D, to_delete, axis=1) letters = np.delete(letters, to_delete) if "bin:" in mode: D = 1 * (D > 0) if "std:" in mode: D = D / (1e-12 + D.std(axis=0)) g = [binds] wg = [1] tg = ["feature_vector"] if w_surface_markers != 0: g.append(d_surface_markers.values) wg.append(w_surface_markers) tg.append("feature_vector") try: sys.path.append(os.path.join(sys.path[0], '../../schema')) import schema_qp except: from schema import schema_qp afx = schema_qp.SchemaQP(min_corr, max_w, params={"require_nonzero_lambda": 1}, mode="scale") afx.fit(D, g, tg, wg) return (pd.Series(np.sqrt(afx._wts), index=letters), afx._soln_info)
def checkMaxFeasibleCorr(D, d0, g, tg, wg): try: sys.path.append(os.path.join(sys.path[0], '../../schema')) import schema_qp except: from schema import schema_qp for thresh in [ 0.30, 0.275, 0.25, 0.225, 0.20, 0.15, 0.10, 0.075, 0.06, 0.05, 0.04, 0.03, 0.025, 0.02, 0.015, 0.01 ]: print("STARTING TRY OF ", thresh) try: sqp = schema_qp.SchemaQP(thresh, w_max_to_avg=1000, params={"dist_npairs": 1000000}, mode="scale") dz1 = sqp.fit(D, g, tg, wg, d0=d0) print("SUCCEEDED TRY OF ", thresh) return 0.9 * thresh, thresh except: print("EXCEPTION WHEN TRYING ", thresh) #raise return 0, 0
def runSchemaGranuleCellDensity(D, d0, gIn, tgIn, wgIn, min_corr1, min_corr2): try: sys.path.append(os.path.join(sys.path[0], '../../schema')) import schema_qp except: from schema import schema_qp f_linear = lambda v: v ret_val = {} w_list = [1, 10, 50, 100] for w in w_list: s = "linear" f = f_linear g1, wg1, tg1 = gIn[:], wgIn[:], tgIn[:] # does maximize negative corr with non-granule wg1[0] = w g = [g1[0]] wg = [wg1[0]] tg = [tg1[0]] # does NOT maximize negative corr with non-granule #afx0 = schema_qp.SchemaQP(0.001, 1000, mode="scale") #Dx0 = afx0.fit_transform(D,g,tg,wg,d0) #ret_val[(s,w,0)] = (np.sqrt(afx0._wts), afx0._soln_info) try: afx1 = schema_qp.SchemaQP(min_corr1, w_max_to_avg=1000, mode="scale") Dx1 = afx1.fit_transform(D, g, tg, wg, d0=d0) ret_val[(s, w, 1)] = ( np.sqrt(afx1._wts), afx1._soln_info ) # does NOT maximize negative corr with non-granule except: print("TRYING min-corr {0} for afx1 broke here".format(min_corr1)) continue try: afx2 = schema_qp.SchemaQP(min_corr1, w_max_to_avg=1000, mode="scale") Dx2 = afx2.fit_transform(D, g1, tg1, wg1, d0=d0) ret_val[(s, w, 2)] = (np.sqrt(afx2._wts), afx2._soln_info ) # does maximize negative corr with non-granule except: print("TRYING min-corr {0} for afx2 broke here".format(min_corr1)) continue try: afx3 = schema_qp.SchemaQP(min_corr2, w_max_to_avg=1000, mode="scale") Dx3 = afx3.fit_transform( D, g1, tg1, wg1, d0=d0) # does maximize negative corr with non-granule ret_val[(s, w, 3)] = (np.sqrt(afx3._wts), afx3._soln_info) except: print("TRYING min-corr {0} for afx3 broke here".format(min_corr2)) continue return ret_val