no3_2_storage = ReadData("datasets/RootArrayData/Root_NO3_Wang04.txt", "dex") no3_3_storage = ReadData("datasets/RootArrayData/Root_NO3_Wang07.txt", "dex") #ts_storage = ReadData("datasets/RootArrayData/Root_WT_Krouk11.txt", "dex") tfs_file = open("datasets/RootArrayData/tfs.csv", 'r') line = tfs_file.readlines()[0] tfs = line.strip().split(',') tfs = [x.upper() for x in tfs] kno3_1 = ReadData("datasets/RootArrayData/KNO3norm1.csv", "dex") kno3_2 = ReadData("datasets/RootArrayData/KNO3norm2.csv", "dex") kno3_3 = ReadData("datasets/RootArrayData/KNO3norm3.csv", "dex") kno3_4 = ReadData("datasets/RootArrayData/KNO3norm4.csv", "dex") settings["global"]["time_series_delta_t"] = "3 3 3 3 3 5" dex_storage.filter(kno3_1.gene_list) dexcombined.filter(kno3_1.gene_list) dex_storage2.filter(kno3_1.gene_list) cnlo_storage.filter(kno3_1.gene_list) cnlo_no3_storage.filter(kno3_1.gene_list) no3_1_storage.filter(kno3_1.gene_list) no3_2_storage.filter(kno3_1.gene_list) no3_3_storage.filter(kno3_1.gene_list) dexcombined.combine(dex_storage2) no3_storage = no3_1_storage no3_storage.combine(no3_2_storage) no3_storage.combine(no3_3_storage) cnlo_no3_storage.combine(no3_storage)
data = {} knockouts = {} wildtypes = {} knockdowns = {} multifactorials = {} timeseries_as_steady_state = {} # Loop over the directories we want, reading in the timeseries files data = ReadData(data_file, "kranthi_data") ss_names = open(ss_names).read().splitlines() ts_names = open(ts_names).read().splitlines() pert_names = open(pert_names).read().splitlines() # Filter out the genes we don't want genes_of_interest = open(genes_file).read().splitlines() genes_of_interest = [x.upper() for x in genes_of_interest] data.filter(genes_of_interest) # Read in the legend so we know what the experiment names relate to col_to_exp = {} pert_cond = {} for line in open(legend): line = line.strip() line = line.split(',') colname = line[0] expname = line[1] col_to_exp[colname] = expname if line[2] != "": pert_cond[colname] = line[2:4]
else: data_points[gene] = [exp.ratios[gene]] if gene in variances: variances[gene].append(variance(data_points[gene])) else: variances[gene] = [variance(data_points[gene])] # Get the top 100 most varying genes topgenes = {} for gene in variances: topgenes[gene] = mean(variances[gene]) topgenes_list = sorted(topgenes, key=lambda key: topgenes[key], reverse=True) topgenes_list = topgenes_list[0:20] c4d.filter(topgenes_list) c4l.filter(topgenes_list) c21d.filter(topgenes_list) c21hl.filter(topgenes_list) c21l.filter(topgenes_list) c21ll.filter(topgenes_list) c32l.filter(topgenes_list) c32l2.filter(topgenes_list) #for dataset in ts_storage: #dataset.normalize() combined.filter(topgenes_list) combined.combine(c4l) combined.combine(c21d)