no3_2_storage = ReadData("datasets/RootArrayData/Root_NO3_Wang04.txt", "dex")
no3_3_storage = ReadData("datasets/RootArrayData/Root_NO3_Wang07.txt", "dex")
#ts_storage = ReadData("datasets/RootArrayData/Root_WT_Krouk11.txt", "dex")

tfs_file = open("datasets/RootArrayData/tfs.csv", 'r')
line = tfs_file.readlines()[0]
tfs = line.strip().split(',')
tfs = [x.upper() for x in tfs]

kno3_1 = ReadData("datasets/RootArrayData/KNO3norm1.csv", "dex")
kno3_2 = ReadData("datasets/RootArrayData/KNO3norm2.csv", "dex")
kno3_3 = ReadData("datasets/RootArrayData/KNO3norm3.csv", "dex")
kno3_4 = ReadData("datasets/RootArrayData/KNO3norm4.csv", "dex")
settings["global"]["time_series_delta_t"] = "3 3 3 3 3 5"

dex_storage.filter(kno3_1.gene_list)
dexcombined.filter(kno3_1.gene_list)
dex_storage2.filter(kno3_1.gene_list)
cnlo_storage.filter(kno3_1.gene_list)
cnlo_no3_storage.filter(kno3_1.gene_list)
no3_1_storage.filter(kno3_1.gene_list)
no3_2_storage.filter(kno3_1.gene_list)
no3_3_storage.filter(kno3_1.gene_list)

dexcombined.combine(dex_storage2)
no3_storage = no3_1_storage
no3_storage.combine(no3_2_storage)
no3_storage.combine(no3_3_storage)

cnlo_no3_storage.combine(no3_storage)
data = {}
knockouts = {}
wildtypes = {}
knockdowns = {}
multifactorials = {}
timeseries_as_steady_state = {}
# Loop over the directories we want, reading in the timeseries files
data = ReadData(data_file, "kranthi_data")
ss_names = open(ss_names).read().splitlines()
ts_names = open(ts_names).read().splitlines()
pert_names = open(pert_names).read().splitlines()

# Filter out the genes we don't want
genes_of_interest = open(genes_file).read().splitlines()
genes_of_interest = [x.upper() for x in genes_of_interest]
data.filter(genes_of_interest)

# Read in the legend so we know what the experiment names relate to
col_to_exp = {}
pert_cond = {}
for line in open(legend):
    line = line.strip()
    line = line.split(',')
    colname = line[0]
    expname = line[1]
    col_to_exp[colname] = expname

    if line[2] != "":
        pert_cond[colname] = line[2:4]

            else:
                data_points[gene] = [exp.ratios[gene]]
        if gene in variances:
            variances[gene].append(variance(data_points[gene]))
        else:
            variances[gene] = [variance(data_points[gene])]

# Get the top 100 most varying genes
topgenes = {}
for gene in variances:
    topgenes[gene] = mean(variances[gene])

topgenes_list = sorted(topgenes, key=lambda key: topgenes[key], reverse=True)
topgenes_list = topgenes_list[0:20]

c4d.filter(topgenes_list)
c4l.filter(topgenes_list)
c21d.filter(topgenes_list)
c21hl.filter(topgenes_list)
c21l.filter(topgenes_list)
c21ll.filter(topgenes_list)
c32l.filter(topgenes_list)
c32l2.filter(topgenes_list)

#for dataset in ts_storage:
    #dataset.normalize()

combined.filter(topgenes_list)

combined.combine(c4l)
combined.combine(c21d)