Beispiel #1
0
def kruskal_test(S,D):
    protein_array = []
    kruskal = []
    for protein in list(set(S.index) & set(D.index)):        
        S_val = S[S.index == protein].values[0]
        if np.isnan(S_val).all():
            continue
        D_val = D[D.index == protein].values[0]
        if np.isnan(D_val).all():
            continue
        kruskal_p = stats.kruskal(S_val,D_val,nan_policy = "omit")[1]
        kruskal.append(kruskal_p)
        protein_array.append(protein)
    
    kruskal = pd.DataFrame(kruskal, index = protein_array, columns = ["p"])
    kruskal.sort_values(by = "p", inplace=True)
    kruskal["q"] = qvalues(kruskal, pcolname = "p")
    return kruskal
def shapiro(df):
    shapiro = pd.DataFrame(df.apply(stats.shapiro, axis=1).tolist(),
                           index=df.index)
    shapiro["q"] = qvalues(shapiro, pcolname="pvalue")
    return shapiro
Beispiel #3
0
def kruskal_treatment_groups(S,D, cell_line = "all"):
    s_1 = S.filter(regex=f"1_{cell_line}*")
    s_2 = S.filter(regex=f"2_{cell_line}*")
    s_3 = S.filter(regex=f"3_{cell_line}*")
    s_4 = S.filter(regex=f"4_{cell_line}*")
    s_5 = S.filter(regex=f"5_{cell_line}*")
    s_6 = S.filter(regex=f"6_{cell_line}*")
    s_7 = S.filter(regex=f"7_{cell_line}*")
    s_8 = S.filter(regex=f"8_{cell_line}*")
    s_9 = S.filter(regex=f"9_{cell_line}*")
    d_1 = D.filter(regex=f"1_{cell_line}*")
    d_2 = D.filter(regex=f"2_{cell_line}*")
    d_3 = D.filter(regex=f"3_{cell_line}*")
    d_4 = D.filter(regex=f"4_{cell_line}*")
    d_5 = D.filter(regex=f"5_{cell_line}*")
    d_6 = D.filter(regex=f"6_{cell_line}*")
    d_7 = D.filter(regex=f"7_{cell_line}*")
    d_8 = D.filter(regex=f"8_{cell_line}*")
    d_9 = D.filter(regex=f"9_{cell_line}*")
    
    if cell_line == "all":
        s_1 = S.filter(regex=f"1_.*")
        s_2 = S.filter(regex=f"2_.*")
        s_3 = S.filter(regex=f"3_.*")
        s_4 = S.filter(regex=f"4_.*")
        s_5 = S.filter(regex=f"5_.*")
        s_6 = S.filter(regex=f"6_.*")
        s_7 = S.filter(regex=f"7_.*")
        s_8 = S.filter(regex=f"8_.*")
        s_9 = S.filter(regex=f"9_.*")
        d_1 = D.filter(regex=f"1_.*")
        d_2 = D.filter(regex=f"2_.*")
        d_3 = D.filter(regex=f"3_.*")
        d_4 = D.filter(regex=f"4_.*")
        d_5 = D.filter(regex=f"5_.*")
        d_6 = D.filter(regex=f"6_.*")
        d_7 = D.filter(regex=f"7_.*")
        d_8 = D.filter(regex=f"8_.*")
        d_9 = D.filter(regex=f"9_.*")
    
    protein_array = []
    kruskal_p = []
    for protein in d_1.index:
        try:
            kruskal = stats.kruskal(s_1[s_1.index == protein],
                                      s_2[s_2.index == protein],
                                      s_3[s_3.index == protein],
                                      s_4[s_4.index == protein],
                                      s_5[s_5.index == protein],
                                      s_6[s_6.index == protein],
                                      s_7[s_7.index == protein],
                                      s_8[s_8.index == protein],
                                      s_9[s_9.index == protein],  
                                      d_1[d_1.index == protein],
                                      d_2[d_2.index == protein],
                                      d_3[d_3.index == protein],
                                      d_4[d_4.index == protein],
                                      d_5[d_5.index == protein],
                                      d_6[d_6.index == protein],
                                      d_7[d_7.index == protein],
                                      d_8[d_8.index == protein],
                                      d_9[d_9.index == protein], nan_policy = "omit")
            protein_array.append(protein)
            kruskal_p.append(kruskal[1])
        except:
            continue    
    
    kruskal_df = pd.DataFrame(kruskal_p, index = protein_array, columns = ["p"])
    kruskal_df.sort_values(by="p",inplace=True)
    kruskal_df["q"] = qvalues(kruskal_df, pcolname="p")
    return kruskal_df
Beispiel #4
0
stats.probplot(Sn.stack().values, dist = "norm", plot=pylab)
stats.probplot(Dn.stack().values, dist = "norm", plot=pylab)


protein = S.index[0]
S_comp = S[S.index == protein]
D_comp = D[D.index == protein]
anova = stats.f_oneway(S_comp.values[0] ,D_comp.values[0])

len(S)

anova_p = stats.f_oneway(S,D,axis=1)[1]
anova_p = pd.DataFrame(anova_p, columns = ["p"])
anova_p["p"].hist(bins=100)
anova_p = anova_p.sort_values(by = "p")
anova_p["q"] = qvalues(anova_p, pcolname = "p")
anova_p["q"].hist(bins=1000)
anova_p.dropna().q.hist(bins=200)

protein

# two treatments
S_t = S.filter(regex="1_.*")
D_t = D.filter(regex="1_.*")

def kruskal_test(S,D):
    protein_array = []
    kruskal = []
    for protein in list(set(S.index) & set(D.index)):        
        S_val = S[S.index == protein].values[0]
        if np.isnan(S_val).all():
Beispiel #5
0
def shapiro(df):
    shapiro = pd.DataFrame(df.apply(stats.shapiro, axis=1).tolist(),
                           index=df.index)
    shapiro["q"] = qvalues(shapiro, pcolname="pvalue")
    return shapiro


shapiro_S = shapiro(S)  #Shapiro p > 0.05 indicates normality
shapiro_D = shapiro(D)
shapiro_S.q.min()
shapiro_D.q.min()

ttest = pd.DataFrame(stats.ttest_ind(S, D, axis=1),
                     columns=S.index,
                     index=["t", "p"]).T
ttest["q"] = qvalues(ttest, pcolname="p")
ttest["p"].hist()
plt.title("t-test, (RKO C vs T3)")

fig, ax = plt.subplots()
S.stack().hist(bins=100)
plt.title("S all FC (RKO C vs T3)")

fig, ax = plt.subplots()
D.stack().hist(bins=100)
plt.title("D all FC (RKO C vs T3)")
D.hist(bins=100)

fig, ax = plt.subplots()
SD.plot(kind="hist", bins=100, grid=True, alpha=0.7)
plt.title("All FC (MCF7 C vs T1)")