Ejemplo n.º 1
0
def plot_sctr(df, col1, col2, path, title, xlab, ylab, coop_x, coop_y):
    plt.clf()
    ax = plt.axes()
    df['label_x'] = df['label_x'].replace({'cooperative': coop_x})
    coop_df = df[df["label_y"] == "cooperative"]
    arr.plot_classified_labels(df,
                               col1=col1,
                               col2=col2,
                               labelcol="label_x",
                               title=title,
                               xlab=xlab,
                               ylab=ylab,
                               labelnames=[coop_x, "independent", "anticoop"],
                               axes=ax)
    x, y = np.log(coop_df[col1].values), np.log(coop_df[col2].values)
    ax.scatter(x, y, color="blue", s=1, label=coop_y)
    ax.legend(loc="lower right")
    plt.savefig(path)
Ejemplo n.º 2
0
def plot_coop(df, namemap, lo1, lo2, lb, suffix):
    dfnm = get_selected_nm(df, namemap)
    oris = {"o1": lo1, "o2": lo2, "both": lb}
    # lbnm = lb[lb["label"] != "fail_cutoff"][["Name"]].drop_duplicates()
    for o in ["o1", "o2", "both"]:
        ax = plt.axes()
        arr.plot_classified_labels(oris[o],
                                   col1="indiv_median",
                                   col2="two_median",
                                   log=True,
                                   xlab="log(m1-m3+m2-m3)",
                                   ylab="log(wt-m3)",
                                   path="labeled_log_%s.png" % o,
                                   title="Cooperative plot, %s" % o,
                                   axes=ax)
        wtavail = oris[o].merge(dfnm[["Name"]])
        ax.scatter(np.log(wtavail["indiv_median"]),
                   np.log(wtavail["two_median"]),
                   color="cyan",
                   s=1,
                   label="wt_selected")
        plt.savefig("in_ori_%s_%s.png" % (o, suffix))
        plt.clf()
Ejemplo n.º 3
0
    # pickle.dump( two, open( "twosites.p", "wb" ) )
    # # indiv = pickle.load( open( "indivsum.p", "rb" ) )
    # # two = pickle.load( open( "twosites.p", "rb" ) )

    lbled = arr.label_replicas_permutation(indiv, two, arr_df, cutoff=cutoff, pcut=0.01, fdrcor=True)
    arr_df["affinity"] = np.log(arr_df["affinity"])
    # arr.plot_multi_scatterbox("alltypes.pdf", arr_df, ["wt","m1","m2","m3"], namecol="Name", affcol="affinity", pline=False, allplot=False, fixed_ax=True)
    # arr.plot_ori_inconsistency(indiv_df, two_df, log=True, fixed_ax=True)
    # import sys
    # sys.exit()

    for ori in lbled:
        lbled[ori].to_csv("lbled_%s.csv"%ori,index=False)
        #lbled[ori]["label"] = lbled[ori]["label"].replace({'additive': 'independent',"anticoop":"anticooperative"})
        arr.plot_classified_labels(lbled[ori], col1="indiv_median", col2="two_median", log=True, plotnonsignif=False,
                        xlab="M1-M3+M2-M3", ylab="WT-M3", path="labeled_log_%s.png" % ori, title="Cooperative vs independent binding of Ets1-Ets1",
                        labelnames=["cooperative","independent","anticooperative"]) #"Cooperative plot, %s" % ori)
        print("Count %s" % ori,lbled[ori]["label"].value_counts())

    lbled_both = lbled["o1"][["Name", "label", "indiv_median", "two_median"]].merge(lbled["o2"][["Name", "label", "indiv_median", 'two_median']], on="Name", suffixes=("_o1", "_o2"))
    lbled_both["indiv_median"] = (lbled_both["indiv_median_o1"] + lbled_both["indiv_median_o2"]) / 2
    lbled_both["two_median"] = (lbled_both["two_median_o1"] + lbled_both["two_median_o2"]) / 2
    lbled_both["label"] = lbled_both.apply(lambda x: assign_label(x["label_o1"], x["label_o2"]), axis=1)
    print(lbled_both)
    lbled_both.to_csv("lbled_both.csv", index=False)
    print("Count both ", lbled_both["label"].value_counts())
    lbled_both["label"] = lbled_both["label"].replace({'additive': 'independent',"anticoop":"anticooperative"})

    arr.plot_classified_labels(lbled_both, col1="indiv_median", col2="two_median", log=True, plotnonsignif=False,
                       xlab="M1-M3+M2-M3", ylab="WT-M3", path="labeled_log_both.png", title="Cooperative vs independent binding of Ets1-Ets1",
                       labelnames=["cooperative","independent","anticooperative"])
        # too low statistical power, can't do fdr correction
        # lbled["p_coop"] = sm.fdrcorrection(lbled["p_coop"])[1]
        # lbled["p_anti"] = sm.fdrcorrection(lbled["p_anti"])[1]
        #print(lbled.groupby("p_coop").count())
        lbled["label"] = lbled.apply(
            lambda x: "cooperative" if x["p_coop"] < p_default else "ambiguous"
            if x["p_coop"] < p_ambiguous else "anticoop"
            if x["p_anti"] < p_default else "additive",
            axis=1)
        df_lbled = median_df.merge(lbled, on="Name")
        below_cut["label"] = "below_cutoff"
        df_lbled = df_lbled.append(below_cut, ignore_index=True)
        olist.append(df_lbled)
        arr.plot_classified_labels(df_lbled,
                                   path="%s_normalized.png" % ori,
                                   title="Cooperative plot, orientation %s" %
                                   ori,
                                   xlab=ch_x,
                                   ylab=ch_y)
        # write the labeled probes to csv
        df_wori = df_genomics[0][df_genomics[0]["ori"] == ori][[
            "Name", "Sequence", "ets_pos", "runx_pos", "ori"
        ]].drop_duplicates()
        #signif_label = df_lbled.loc[df_lbled["label"] != "below_cutoff",["Name","label","p_coop"]].drop_duplicates()
        lbled = df_lbled[["Name", "label", "p_coop"]].drop_duplicates()
        df_comb \
            .rename(columns={"Alexa488Adjusted_x":"ch1", "Alexa488Adjusted_y": "ch2"}) \
            .merge(lbled, on=["Name"]) \
            .merge(df_wori, on=["Name"]) \
            .to_csv("probes_labeled_%s.csv" % ori, index=False, float_format='%.4f')
    both_ori = olist[0].merge(olist[1], on=["Name"], suffixes=("_er", "_re"))
Ejemplo n.º 5
0
    both_ori.to_csv("%s/both_%s_%s.csv" % (baseoutpath, maintf, cooptf),
                    index=False)
    plot_pval(both_ori, "%s/pval.png" % baseoutpath)
    print(
        "Number of distinct names, above cutoff, after orientation joining %d"
        % both_ori[both_ori["label"] != "below_cutoff"]["Name"].nunique())

    both_ori_plt = both_ori[["Name", "intensity_x", "intensity_y", "label"]]
    both_ori_plt.to_csv("%s/both_ori_plt_%s_%s.csv" %
                        (baseoutpath, maintf, cooptf),
                        index=False)
    arr.plot_classified_labels(
        both_ori_plt,
        path="%s/both_normalized_%s_%s.png" % (baseoutpath, maintf, cooptf),
        col1="intensity_x",
        col2="intensity_y",
        title=both_title,
        xlab=ch_x,
        ylab=ch_y,
        plotnonsignif=False,
        labelnames=["cooperative", "independent", "anticoop"])

    print("Count per label", both_ori_plt[["label",
                                           "Name"]].groupby("label").count())

    # we use sequence where ets1 is on the left for simplicity
    name_info = df_m[df_m["ori"] == "er"][[
        "Name", "Sequence",
        "%s_pos" % maintf,
        "%s_start" % maintf,
        "%s_pos" % cooptf,
        "%s_start" % cooptf, "ori"
Ejemplo n.º 6
0
                    (k, ori, 'm2')] - 2 * median_dict[(k, ori, 'm3')]
            rowdict['two_median'] = median_dict[(k, ori, 'wt')] - median_dict[
                (k, ori, 'm3')]
            rowdict['Name'] = k
            orilbls.append(rowdict)
        #print(orilbls)
        labeled_dict[ori] = pd.DataFrame(orilbls)
        labeled_dict[ori]['p'] = sm.fdrcorrection(labeled_dict[ori]['p'])[1]
        labeled_dict[ori]['label'] = labeled_dict[ori].apply(
            lambda row: assign_class(row['p'], row['label']), axis=1)
        print(ori, labeled_dict[ori]["label"].value_counts())
        arr.plot_classified_labels(labeled_dict[ori],
                                   col1="indiv_median",
                                   col2="two_median",
                                   log=True,
                                   xlab="log(wt-m3)",
                                   ylab="log(m1-m3+m2-m3)",
                                   path="coop_log_%s.png" % ori,
                                   title="Cooperative plot (in log), ori %s" %
                                   ori)
        arr.plot_classified_labels(labeled_dict[ori],
                                   col1="indiv_median",
                                   col2="two_median",
                                   log=False,
                                   xlab="wt-m3",
                                   ylab="m1-m3+m2-m3",
                                   path="coop_%s.png" % ori,
                                   title="Cooperative plot, ori %s" % ori)

    df_er = labeled_dict['er'][labeled_dict['er']["label"] != "below_cutoff"]
    df_re = labeled_dict['re'][labeled_dict['re']["label"] != "below_cutoff"]
Ejemplo n.º 7
0
    # pickle.dump( two, open( "twosites.p", "wb" ) )
    # indiv = pickle.load( open( "indivsum.p", "rb" ) )
    # two = pickle.load( open( "twosites.p", "rb" ) )

    lbled = arr.label_replicas_permutation(indiv,
                                           two,
                                           df,
                                           cutoff=cutoff,
                                           fdrcor=False,
                                           pcut=0.05)
    for ori in lbled:
        lbled[ori].to_csv("lbled_%s.csv" % ori, index=False)
        arr.plot_classified_labels(lbled[ori],
                                   col1="indiv_median",
                                   col2="two_median",
                                   log=True,
                                   xlab="log(m1-m3+m2-m3)",
                                   ylab="log(wt-m3)",
                                   path="labeled_log_%s.png" % ori,
                                   title="Cooperative plot, %s" % ori)
        print("Count %s" % ori, lbled[ori]["label"].value_counts())

    lbled_both = lbled["o1"][[
        "Name", "p", "label", "indiv_median", "two_median"
    ]].merge(lbled["o2"][["Name", "label", "p", "indiv_median", 'two_median']],
             on="Name",
             suffixes=("_o1", "_o2"))
    lbled_both.to_csv("lbled_both.csv", index=False)
    # lbled_both = pd.read_csv("lbled_both.csv")
    lbled_both["indiv_median"] = (lbled_both["indiv_median_o1"] +
                                  lbled_both["indiv_median_o2"]) / 2
    lbled_both["two_median"] = (lbled_both["two_median_o1"] +
Ejemplo n.º 8
0
        "%s/ch3_ch4/coop_ch3_vs_ch4/tables/probes_labeled_er.csv" %
        basepath)[["Name", "ch1", "ch2", "ori"]]
    df2_re = pd.read_csv(
        "%s/ch3_ch4/coop_ch3_vs_ch4/tables/probes_labeled_re.csv" %
        basepath)[["Name", "ch1", "ch2", "ori"]]
    df2 = pd.concat([df2_er, df2_re
                     ]).groupby(["Name", "ori"
                                 ]).median().reset_index().merge(df2_label,
                                                                 on="Name")

    arr.plot_classified_labels(
        df2,
        path="aaa.png",
        col1="ch1",
        col2="ch2",
        labelcol="label",
        title="Ets_Runx1",
        xlab="Ch1",
        ylab="Ch2",
        labelnames=["cooperative", "independent", "anticoop"],
    )

    #combine_ch_intensity(df1, df2)
    """
    train_ch1 = pd.read_csv("%s/ch1_ch2/training_pwm.tsv" % basepath, sep="\t")
    train_ch2 = pd.read_csv("%s/ch3_ch4/training_pwm.tsv" % basepath, sep="\t")

    print("Chamber 1-2")
    print(df1.groupby("label")["ets_score"].count())

    print("Chamber 3-4")
Ejemplo n.º 9
0
    ch34_two = pd.read_csv("%s/ch3_ch4/coop_ch3_vs_ch4/tables/two_df.tsv" %
                           basepath,
                           sep="\t")

    print(ch12)
    print("ch1ch2")
    print(ch12["label"].value_counts())

    print("ch3ch4")
    print(ch34["label"].value_counts())

    # plot
    ax = plt.axes()
    arr.plot_classified_labels(ch12,
                               title="Cooperative plot, both orientations",
                               col1="ch1",
                               col2="ch2",
                               axes=ax)
    coop34_in12 = ch34[ch34["label"] == "cooperative"][["Name"]] \
        .merge(ch12, on=["Name"])
    x, y = np.log(coop34_in12["ch1"].values), np.log(coop34_in12["ch2"].values)
    ax.scatter(x, y, color="cyan", s=3, label="overlap coop ch1ch2 ch3ch4")
    ax.legend(loc="lower right")
    plt.savefig("overlap.png")

    print("Overlap in ch1ch2 and ch3ch4")
    probe_overlap = ch12.merge(ch34, on="Name")
    match_lbl = probe_overlap[probe_overlap["label_x"] ==
                              probe_overlap["label_y"]]
    print(probe_overlap.shape[0])
    print(match_lbl["label_x"].value_counts())