Esempio n. 1
0
#     sample1 = [row['CFU'], row['unk']]
#     sample2 = [row['poly'], row['int']]
#     print(gene_name, stats.ttest_ind(sample1, sample2).pvalue)
cfu1 = list(diff_exp_genes["CFU"].values)
poly1 = list(diff_exp_genes["poly"].values)
int1 = list(diff_exp_genes["int"].values)
unk1 = list(diff_exp_genes["unk"].values)
gene_name1 = list(diff_exp_genes.index.values)
l = len(gene_name1)
# print(l)
# print(gene_name1)
sig_de_genes = []
for i in range(l):
    early = [cfu1[i], unk1[i]]
    late = [poly1[i], int1[i]]
    t, p = (sp.ttest_rel(early, late))
    if p < 0.05:
        sig_de_genes.append(gene_name1[i])
        # print(i)
print(sig_de_genes)

labels = list(kmeans.labels_)
genes = list(df_data.index.values)
goi_index = genes.index(sys.argv[2])
goi_cluster = labels[goi_index]
related_genes = []
for i, gene in enumerate(genes):
    if labels[i] == goi_cluster:
        related_genes.append(gene)
print(related_genes)
import scipy as sp
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy.cluster.hierarchy import linkage, dendrogram, leaves_list
import scipy.stats as sp

hema = open(sys.argv[1])

df = pd.read_csv(hema, sep="\t", index_col=0)

diff_exp_high = (((df['CFU'] + df['unk']) / 2) /
                 ((df['poly'] + df['int']) / 2)) >= 2
diff_exp_low = (((df['CFU'] + df['unk']) / 2) /
                ((df['poly'] + df['int']) / 2)) <= 0.5

diff_exp_genes = df[
    diff_exp_high |
    diff_exp_low]  #it prints out a whole dataframe, we just got the genes that have 2fold exp, we will now test

for gene_name, row in diff_exp_genes.iterrows():
    sample1 = [row['CFU'], row['unk']]
    sample2 = [row['poly'], row['int']]
    # print(gene_name,sp.ttest_rel(sample1, sample2).pvalue)
    if sp.ttest_rel(sample1, sample2).pvalue <= 0.05:
        print(gene_name, sp.ttest_rel(sample1, sample2).pvalue)

# for gene, row in diff_exp_genes.iterrows():
#     diff_high = (((df['CFU']+df['unk'])/2)/((df['poly']+df['int'])/2))
#     diff_low = (((df['CFU']+df['unk'])/2)/((df['poly']+df['int'])/2))
#