# sample1 = [row['CFU'], row['unk']] # sample2 = [row['poly'], row['int']] # print(gene_name, stats.ttest_ind(sample1, sample2).pvalue) cfu1 = list(diff_exp_genes["CFU"].values) poly1 = list(diff_exp_genes["poly"].values) int1 = list(diff_exp_genes["int"].values) unk1 = list(diff_exp_genes["unk"].values) gene_name1 = list(diff_exp_genes.index.values) l = len(gene_name1) # print(l) # print(gene_name1) sig_de_genes = [] for i in range(l): early = [cfu1[i], unk1[i]] late = [poly1[i], int1[i]] t, p = (sp.ttest_rel(early, late)) if p < 0.05: sig_de_genes.append(gene_name1[i]) # print(i) print(sig_de_genes) labels = list(kmeans.labels_) genes = list(df_data.index.values) goi_index = genes.index(sys.argv[2]) goi_cluster = labels[goi_index] related_genes = [] for i, gene in enumerate(genes): if labels[i] == goi_cluster: related_genes.append(gene) print(related_genes)
import scipy as sp import matplotlib.pyplot as plt import pandas as pd import seaborn as sns from scipy.cluster.hierarchy import linkage, dendrogram, leaves_list import scipy.stats as sp hema = open(sys.argv[1]) df = pd.read_csv(hema, sep="\t", index_col=0) diff_exp_high = (((df['CFU'] + df['unk']) / 2) / ((df['poly'] + df['int']) / 2)) >= 2 diff_exp_low = (((df['CFU'] + df['unk']) / 2) / ((df['poly'] + df['int']) / 2)) <= 0.5 diff_exp_genes = df[ diff_exp_high | diff_exp_low] #it prints out a whole dataframe, we just got the genes that have 2fold exp, we will now test for gene_name, row in diff_exp_genes.iterrows(): sample1 = [row['CFU'], row['unk']] sample2 = [row['poly'], row['int']] # print(gene_name,sp.ttest_rel(sample1, sample2).pvalue) if sp.ttest_rel(sample1, sample2).pvalue <= 0.05: print(gene_name, sp.ttest_rel(sample1, sample2).pvalue) # for gene, row in diff_exp_genes.iterrows(): # diff_high = (((df['CFU']+df['unk'])/2)/((df['poly']+df['int'])/2)) # diff_low = (((df['CFU']+df['unk'])/2)/((df['poly']+df['int'])/2)) #