Ejemplo n.º 1
0
def f_colprocess_helper(trx, binds, surface_markers, mw, w_surface_markers):
    try:
        g = [binds]
        wg = [1]
        tg = ["feature_vector"]

        if w_surface_markers != 0:
            g.append(d_surface_markers.values)
            wg.append(w_surface_markers)
            tg.append("feature_vector")

        try:
            sys.path.append(os.path.join(sys.path[0], '../../schema'))
            import schema_qp
        except:
            from schema import schema_qp

        afx = schema_qp.SchemaQP(0.01,
                                 mw,
                                 params={
                                     "require_nonzero_lambda": 1,
                                     "scale_mode_uses_standard_scaler": 1,
                                     "d0_type_is_feature_vector_categorical":
                                     1,
                                 },
                                 mode="scale")
        afx.fit(trx, g, tg, wg)
        return (pd.Series(np.sqrt(afx._wts)), afx._soln_info)

    except:
        print("Flag 67568.10 Saw exception in f_colprocess_helper")
        return (None, None)
Ejemplo n.º 2
0
def f_helper_mprun_schemawts_2(args):
    ax, dz, dir1, outsfx, min_corr, maxwt, strand, chromosome, adata_norm_style = args

    if adata_norm_style == 1:
        ax_l2norm = np.sqrt(np.sum(ax**2, axis=1))
        ax = ax.copy() / (1e-12 + ax_l2norm[:, None])

    elif adata_norm_style == 2:
        ax_l2norm = np.sqrt(np.sum(ax**2, axis=1))
        ax = np.sort(ax.copy(), axis=1) / (1e-12 + ax_l2norm[:, None])
        print("Flag 231.10 ")
        print(ax[-10:, -10:])
        print(np.sum(ax**2, axis=1)[-10:])

    dz_cols = dz.columns[:-2]
    dz_vals = dz.values[:, :-2]

    vstd = np.std(dz_vals.astype(float), axis=0)
    print("Flag 231.0275 ", vstd.shape, dz_vals.shape, flush=True)
    dz_vals = dz_vals.copy() / (1e-12 + vstd)

    try:
        sys.path.append(os.path.join(sys.path[0], '../../schema'))
        import schema_qp
    except:
        from schema import schema_qp

    sqp = schema_qp.SchemaQP(min_corr,
                             maxwt,
                             params={"dist_npairs": 1000000},
                             mode="scale")
    try:
        dz1 = sqp.fit_transform(dz_vals, [ax], ['feature_vector'], [1])

        print("Flag 231.030 ",
              min_corr,
              maxwt,
              dz_vals.shape,
              ax.shape,
              flush=True)

        wtsx = np.sqrt(np.maximum(sqp._wts / np.sum(sqp._wts), 0))
    except:
        print("ERROR: schema failed for ", min_corr, maxwt, strand, chromosome)
        wtsx = 1e12 * np.ones(dz_vals.shape[1])

    wdf = pd.Series(wtsx, index=dz_cols).sort_values(
        ascending=False).reset_index().rename(columns={
            "index": "fdist",
            0: "wt"
        })
    wdf.to_csv(
        "{0}/adata1_sqp_wts_mincorr{1}_maxw{2}_strand{4}_chr{5}_adatanorm{8}_{7}.csv"
        .format(dir1, min_corr, maxwt, 1, strand, chromosome, 5, outsfx,
                adata_norm_style),
        index=False)
Ejemplo n.º 3
0
def run_dataset_schema(tra,
                       trb,
                       binds,
                       min_corr,
                       max_w=1000,
                       mode="both",
                       d_surface_markers=None,
                       w_surface_markers=0):
    alphabet = [chr(ord('A') + i) for i in range(26)]
    non_aa = np.array([ord(c) - ord('A')
                       for c in "BJOUXZ"])  #list interepretation of string
    if "both" in mode:
        D = np.hstack([tra, trb])
        letters = np.array(['a' + c
                            for c in alphabet] + ['b' + c for c in alphabet])
        to_delete = list(non_aa) + list(non_aa + 26)
    elif "tra" in mode:
        D = tra
        letters = ['{0}'.format(chr(ord('A') + i)) for i in range(26)]
        to_delete = list(non_aa)
    elif "trb" in mode:
        D = trb
        letters = ['{0}'.format(chr(ord('A') + i)) for i in range(26)]
        to_delete = list(non_aa)

    D = np.delete(D, to_delete, axis=1)
    letters = np.delete(letters, to_delete)

    if "bin:" in mode:
        D = 1 * (D > 0)

    if "std:" in mode:
        D = D / (1e-12 + D.std(axis=0))

    g = [binds]
    wg = [1]
    tg = ["feature_vector"]

    if w_surface_markers != 0:
        g.append(d_surface_markers.values)
        wg.append(w_surface_markers)
        tg.append("feature_vector")

    try:
        sys.path.append(os.path.join(sys.path[0], '../../schema'))
        import schema_qp
    except:
        from schema import schema_qp

    afx = schema_qp.SchemaQP(min_corr,
                             max_w,
                             params={"require_nonzero_lambda": 1},
                             mode="scale")
    afx.fit(D, g, tg, wg)
    return (pd.Series(np.sqrt(afx._wts), index=letters), afx._soln_info)
Ejemplo n.º 4
0
def checkMaxFeasibleCorr(D, d0, g, tg, wg):
    try:
        sys.path.append(os.path.join(sys.path[0], '../../schema'))
        import schema_qp
    except:
        from schema import schema_qp

    for thresh in [
            0.30, 0.275, 0.25, 0.225, 0.20, 0.15, 0.10, 0.075, 0.06, 0.05,
            0.04, 0.03, 0.025, 0.02, 0.015, 0.01
    ]:
        print("STARTING TRY OF ", thresh)
        try:
            sqp = schema_qp.SchemaQP(thresh,
                                     w_max_to_avg=1000,
                                     params={"dist_npairs": 1000000},
                                     mode="scale")
            dz1 = sqp.fit(D, g, tg, wg, d0=d0)
            print("SUCCEEDED TRY OF ", thresh)
            return 0.9 * thresh, thresh
        except:
            print("EXCEPTION WHEN TRYING ", thresh)
            #raise
    return 0, 0
Ejemplo n.º 5
0
def runSchemaGranuleCellDensity(D, d0, gIn, tgIn, wgIn, min_corr1, min_corr2):
    try:
        sys.path.append(os.path.join(sys.path[0], '../../schema'))
        import schema_qp
    except:
        from schema import schema_qp

    f_linear = lambda v: v
    ret_val = {}

    w_list = [1, 10, 50, 100]
    for w in w_list:
        s = "linear"
        f = f_linear

        g1, wg1, tg1 = gIn[:], wgIn[:], tgIn[:]  # does maximize negative corr with non-granule
        wg1[0] = w

        g = [g1[0]]
        wg = [wg1[0]]
        tg = [tg1[0]]  # does NOT maximize negative corr with non-granule

        #afx0 = schema_qp.SchemaQP(0.001, 1000, mode="scale")
        #Dx0 = afx0.fit_transform(D,g,tg,wg,d0)
        #ret_val[(s,w,0)] = (np.sqrt(afx0._wts), afx0._soln_info)

        try:
            afx1 = schema_qp.SchemaQP(min_corr1,
                                      w_max_to_avg=1000,
                                      mode="scale")
            Dx1 = afx1.fit_transform(D, g, tg, wg, d0=d0)
            ret_val[(s, w, 1)] = (
                np.sqrt(afx1._wts), afx1._soln_info
            )  # does NOT maximize negative corr with non-granule
        except:
            print("TRYING min-corr {0} for afx1 broke here".format(min_corr1))
            continue

        try:
            afx2 = schema_qp.SchemaQP(min_corr1,
                                      w_max_to_avg=1000,
                                      mode="scale")
            Dx2 = afx2.fit_transform(D, g1, tg1, wg1, d0=d0)
            ret_val[(s, w,
                     2)] = (np.sqrt(afx2._wts), afx2._soln_info
                            )  # does maximize negative corr with non-granule
        except:
            print("TRYING min-corr {0} for afx2 broke here".format(min_corr1))
            continue

        try:
            afx3 = schema_qp.SchemaQP(min_corr2,
                                      w_max_to_avg=1000,
                                      mode="scale")
            Dx3 = afx3.fit_transform(
                D, g1, tg1, wg1,
                d0=d0)  # does maximize negative corr with non-granule
            ret_val[(s, w, 3)] = (np.sqrt(afx3._wts), afx3._soln_info)
        except:
            print("TRYING min-corr {0} for afx3 broke here".format(min_corr2))
            continue

    return ret_val