예제 #1
0
    def run(self, ko_file, wt_file, ts_file=None, kd_file=None, name=None):
        import numpy

        os.chdir(os.environ["gene_path"])

        print "Reading in knockout data"
        knockout_storage = ReadData(ko_file, "knockout")
        knockout_storage.normalize()
        wildtype_storage = ReadData(wt_file, "wildtype")
        wildtype_storage.normalize()
        knockdown_storage = ReadData(kd_file, "knockdown")
        knockdown_storage.normalize()

        wildtype_storage.combine(knockdown_storage)

        timeseries_storage = None
        if ts_file != None:
            timeseries_storage = ReadData(ts_file, "timeseries")
            for ts in timeseries_storage:
                ts.normalize()

        settings = {}
        settings = ReadConfig(settings)
        # TODO: CHANGE ME
        settings["global"]["working_dir"] = os.getcwd() + '/'

        # Setup job manager
        print "Starting new job manager"
        jobman = JobManager(settings)

        # Make MCZ jobs
        mczjob = MCZ()
        mczjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, name)

        print "Queuing job..."
        jobman.queueJob(mczjob)

        print jobman.queue
        print "Running queue..."
        jobman.runQueue()
        jobman.waitToClear()

        print "Queue finished"
        job = jobman.finished[0]
        print job.alg.gene_list
        print job.alg.read_output(settings)
        jobnet = job.alg.network
        print "PREDICTED NETWORK:"
        print job.alg.network.network
        print jobnet.original_network

        return jobnet.original_network
예제 #2
0
    def run(self, name, datafiles, goldnet_file):
        import numpy

        os.chdir(os.environ["gene_path"])

        datastore = ReadData(datafiles[0], "steadystate")
        for file in datafiles[1:]:
            datastore.combine(ReadData(file, "steadystate"))
        datastore.normalize()

        settings = {}
        settings = ReadConfig(settings)
        # TODO: CHANGE ME
        settings["global"]["working_dir"] = os.getcwd() + '/'

        # Setup job manager
        print "Starting new job manager"
        jobman = JobManager(settings)

        # Make GENIE3 jobs
        genie3 = GENIE3()
        genie3.setup(datastore, settings, name)

        print "Queuing job..."
        jobman.queueJob(genie3)

        print jobman.queue
        print "Running queue..."
        jobman.runQueue()
        jobman.waitToClear()

        print "Queue finished"
        job = jobman.finished[0]
        print job.alg.gene_list
        print job.alg.read_output(settings)
        jobnet = job.alg.network
        print "PREDICTED NETWORK:"
        print job.alg.network.network
        print jobnet.original_network

        if goldnet_file != None:
            goldnet = Network()
            goldnet.read_goldstd(goldnet_file)
            print "GOLD NETWORK:"
            print goldnet.network
            print jobnet.analyzeMotifs(goldnet).ToString()
            print jobnet.calculateAccuracy(goldnet)

        return jobnet.original_network
for key in goldnets.keys():
    goldnet = Network()
    goldnet.read_goldstd(goldnets[key])
    goldnets[key] = goldnet



genie3nets = {}
for i in range(20):
    for name in data.keys():
        ts_storage = data[name]
        settings["global"]["time_series_delta_t"] = (1008.0 / (len(ts_storage[0].experiments)-1))
        combined = ReadData(exp_data_directory + '/' + name + '/' + timeseries_filename, "timeseries")[0]

        for ts in timeseries_as_steady_state[name][1:11]:
            combined.combine(ts)
        #combined.combine(knockouts[name])
        combined.combine(multifactorials[name])

        genie3job = GENIE3()
        genie3job.setup(combined, settings, "Genie3_TimeSeries_{0}_{1}".format(name, i))
        jobman.queueJob(genie3job)
        genie3nets[name] = genie3job
        genie3job.goldnet = goldnets[name]


jobman.runQueue()
jobman.waitToClear()


for job in jobman.finished:
goldnet = Network()
goldnet.read_goldstd(settings["global"]["large_network_goldnet_file"])

ko_file = settings["global"]["large_network_knockout_file"].split()
kd_file = settings["global"]["large_network_knockdown_file"].split()
ts_file = settings["global"]["large_network_timeseries_file"].split()

wt_file = settings["global"]["large_network_wildtype_file"].split()

# Read data into program
# Where the format is "FILENAME" "DATATYPE"
knockout_storage = ReadData(ko_file[0], "knockout")
knockdown_storage = ReadData(kd_file[0], "knockdown")
timeseries_storage = ReadData(ts_file[0], "timeseries")
wildtype_storage = ReadData(wt_file[0], "wildtype")
wildtype_storage.combine(knockout_storage)
wildtype_storage.combine(knockdown_storage)
wildtype_storage.combine(timeseries_storage)



# Setup job manager
jobman = JobManager(settings)

# Make BANJO jobs
mczjob = MCZ()
mczjob.setup(knockout_storage, wildtype_storage, settings, None, "mcz-test-run-1")
jobman.queueJob(mczjob)

print jobman.queue
jobman.runQueue()
def get_network_results(name, settings, cache):
  print "STARTING", name

  if name in cache.keys():
    print "CACHE HIT"
    return cache[name]

  ko_file, kd_file, ts_file, wt_file, mf_file, goldnet = get_example_data_files(name, settings)

  # Create date string to append to output_dir
  t = datetime.now().strftime("%Y-%m-%d_%H.%M.%S")
  settings["global"]["output_dir"] = settings["global"]["output_dir_save"] + "/" + \
      settings["global"]["experiment_name"] + "-" + t + "-" + name + "/"
  os.mkdir(settings["global"]["output_dir"])

  # Get a list of the multifactorial files

  # Read data into program
  # Where the format is "FILENAME" "DATATYPE"
  mf_storage = ReadData(mf_file[0], "multifactorial")
  knockout_storage = ReadData(ko_file[0], "knockout")
  knockdown_storage = ReadData(kd_file[0], "knockdown")
  wildtype_storage = ReadData(wt_file[0], "wildtype")
  timeseries_storage = ReadData(ts_file[0], "timeseries")
  gene_list = knockout_storage.gene_list

  # Setup job manager
  jobman = JobManager(settings)

  # MCZ
  mczjob = MCZ()
  mczjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, knockdown_storage, "MCZ")
  jobman.queueJob(mczjob)

  # CLR
  clrjob = CLR()
  clrjob.setup(knockout_storage, settings, "CLR", "plos", 6)
  jobman.queueJob(clrjob)

  # GENIE3
  mf_storage.combine(knockout_storage)
  mf_storage.combine(wildtype_storage)
  mf_storage.combine(knockdown_storage)
  genie3job = GENIE3()
  genie3job.setup(mf_storage, settings, "GENIE3")
  jobman.queueJob(genie3job)

  ## TLCLR
  tlclrjob = TLCLR()
  tlclrjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, knockdown_storage, "TLCLR")
  jobman.queueJob(tlclrjob)

  #if sys.argv[1] != "dream4100":
      #cojob = ConvexOptimization()
      #cojob.setup(knockout_storage, settings, "ConvOpt_T-"+ str(0.01),None, None, 0.01)
      #jobman.queueJob(cojob)

  ### DFG4GRN
  dfg = DFG4GRN()
  settings["dfg4grn"]["eta_z"] = 0.01
  settings["dfg4grn"]["lambda_w"] = 0.001
  settings["dfg4grn"]["tau"] = 3
  dfg.setup(timeseries_storage, TFList(timeseries_storage[0].gene_list), settings, "DFG", 20)
  jobman.queueJob(dfg)

  ### Inferelator

  ### NIR
  nirjob = NIR()
  nirjob.setup(knockout_storage, settings, "NIR", 5, 5)
  jobman.queueJob(nirjob)

  #### TDARACNE
  settings = ReadConfig(settings, "./config/default_values/tdaracne.cfg")
  bjob = tdaracne()
  settings["tdaracne"]["num_bins"] = 4
  bjob.setup(timeseries_storage, settings, "TDARACNE")
  jobman.queueJob(bjob)


  print jobman.queue
  jobman.runQueue()
  jobman.waitToClear(name)
  SaveResults(jobman.finished, goldnet, settings, name)

  cache[name] = jobman.finished[:]

  return cache[name]
kno3_1 = ReadData("datasets/RootArrayData/KNO3norm1.csv", "dex")
kno3_2 = ReadData("datasets/RootArrayData/KNO3norm2.csv", "dex")
kno3_3 = ReadData("datasets/RootArrayData/KNO3norm3.csv", "dex")
kno3_4 = ReadData("datasets/RootArrayData/KNO3norm4.csv", "dex")
settings["global"]["time_series_delta_t"] = "3 3 3 3 3 5"

dex_storage.filter(kno3_1.gene_list)
dexcombined.filter(kno3_1.gene_list)
dex_storage2.filter(kno3_1.gene_list)
cnlo_storage.filter(kno3_1.gene_list)
cnlo_no3_storage.filter(kno3_1.gene_list)
no3_1_storage.filter(kno3_1.gene_list)
no3_2_storage.filter(kno3_1.gene_list)
no3_3_storage.filter(kno3_1.gene_list)

dexcombined.combine(dex_storage2)
no3_storage = no3_1_storage
no3_storage.combine(no3_2_storage)
no3_storage.combine(no3_3_storage)

cnlo_no3_storage.combine(no3_storage)

#all_storage.combine(cnlo_no3_storage)

#dex_storage.combine(cnlo_storage)
#dex_storage.combine(no3_storage)

#dex_storage.normalize()
no3_storage.normalize()
cnlo_storage.normalize()
cnlo_no3_storage.normalize()
    def run(self, kofile, tsfile, wtfile, datafiles, name, goldnet_file, normalize=False):
        os.chdir(os.environ["gene_path"])
        knockout_storage = ReadData(kofile, "knockout")
        print "Reading in knockout data"
        wildtype_storage = ReadData(wtfile, "steadystate")

        if datafiles == []:
          other_storage = None
        else:
          other_storage = ReadData(datafiles[0], "steadystate")
          for file in datafiles[1:]:
              other_storage.combine(ReadData(file, "steadystate"))

        timeseries_storage = None
        if tsfile != None:
            timeseries_storage = ReadData(tsfile, "timeseries")
            #for ts in timeseries_storage:
                #ts.normalize()

        #if normalize:
            #knockout_storage.normalize()
            #wildtype_storage.normalize()
            #other_storage.normalize()


        settings = {}
        settings = ReadConfig(settings)
        # TODO: CHANGE ME
        settings["global"]["working_dir"] = os.getcwd() + '/'

        # Setup job manager
        print "Starting new job manager"
        jobman = JobManager(settings)

        # Make inferelator jobs
        inferelatorjob = inferelator()
        inferelatorjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, other_storage, name)

        print "Queuing job..."
        jobman.queueJob(inferelatorjob)

        print jobman.queue
        print "Running queue..."
        jobman.runQueue()
        jobman.waitToClear()

        print "Queue finished"
        job = jobman.finished[0]
        #print job.alg.gene_list
        #print job.alg.read_output(settings)
        jobnet = job.alg.network
        #print "PREDICTED NETWORK:"
        #print job.alg.network.network
        print jobnet.original_network

        if goldnet_file != None:
            goldnet = Network()
            goldnet.read_goldstd(goldnet_file)
            #print "GOLD NETWORK:"
            #print goldnet.network
            #print jobnet.analyzeMotifs(goldnet).ToString()
            print jobnet.calculateAccuracy(goldnet)
            import AnalyzeResults
            tprs, fprs, rocs = AnalyzeResults.GenerateMultiROC(jobman.finished, goldnet )
            ps, rs, precs = AnalyzeResults.GenerateMultiPR(jobman.finished, goldnet)
            print "Area Under ROC"
            print rocs

            print "Area Under PR"
            print precs

        return jobnet.original_network
c4d.filter(topgenes_list)
c4l.filter(topgenes_list)
c21d.filter(topgenes_list)
c21hl.filter(topgenes_list)
c21l.filter(topgenes_list)
c21ll.filter(topgenes_list)
c32l.filter(topgenes_list)
c32l2.filter(topgenes_list)

#for dataset in ts_storage:
    #dataset.normalize()

combined.filter(topgenes_list)

combined.combine(c4l)
combined.combine(c21d)
combined.combine(c21hl)
combined.combine(c21l)
combined.combine(c21ll)
combined.combine(c32l)
combined.combine(c32l2)


# Remove the last time point for testing
leave_out = []
for i, ts in enumerate(ts_storage):
    leave_out.append(ts.experiments[-1])


#goldnet = Network()
예제 #9
0
    def run(self, datafiles=None, name=None, goldnet_file=None, topd=None, restk=None):
        import numpy

        os.chdir(os.environ["gene_path"])

        print "Reading in data"
        data_storage = ReadData(datafiles[0], "steadystate")
        for file in datafiles[1:]:
            data_storage.combine(ReadData(file, "steadystate"))

        settings = {}
        settings = ReadConfig(settings)
        # TODO: CHANGE ME
        settings["global"]["working_dir"] = os.getcwd() + "/"

        # Setup job manager
        print "Starting new job manager"
        jobman = JobManager(settings)

        # Make nir jobs
        nirjob = NIR()
        nirjob.setup(data_storage, settings, name, topd, restk)

        print "Queuing job..."
        jobman.queueJob(nirjob)

        print jobman.queue
        print "Running queue..."
        jobman.runQueue()
        jobman.waitToClear()

        print "Queue finished"
        job = jobman.finished[0]
        print job.alg.gene_list
        print job.alg.read_output(settings)
        jobnet = job.alg.network
        print "PREDICTED NETWORK:"
        print job.alg.network.network

        if goldnet_file != None:
            goldnet = Network()
            goldnet.read_goldstd(goldnet_file)
            # print "GOLD NETWORK:"
            # print goldnet.network
            # print jobnet.analyzeMotifs(goldnet).ToString()
            print jobnet.calculateAccuracy(goldnet)
            import AnalyzeResults

            tprs, fprs, rocs = AnalyzeResults.GenerateMultiROC(
                jobman.finished, goldnet, True, job.alg.output_dir + "/ROC.pdf"
            )
            ps, rs, precs = AnalyzeResults.GenerateMultiPR(
                jobman.finished, goldnet, True, job.alg.output_dir + "/PR.pdf"
            )
            print "Area Under ROC"
            print rocs

            print "Area Under PR"
            print precs

        return job.alg.network.network
jobman = JobManager(settings)

# Make BANJO jobs
mczjob = MCZ()
mczjob.setup(knockout_storage, wildtype_storage, settings, timeseries_storage, knockdown_storage, "MCZ_Alone")
jobman.queueJob(mczjob)

clrjob = CLR()
clrjob.setup(knockout_storage, settings, "clr_" + t + "_Bins-" + str(6), "plos", 6)
jobman.queueJob(clrjob)

#cojob = ConvexOptimization()
#cojob.setup(knockout_storage, settings, "ConvOpt_T-Plos",None, None, 0.04)
#jobman.queueJob(cojob)

mf_storage.combine(knockout_storage)
mf_storage.combine(wildtype_storage)
mf_storage.combine(knockdown_storage)
genie3job = GENIE3()
genie3job.setup(mf_storage, settings, "MF_KO_WT_KD")
jobman.queueJob(genie3job)

print jobman.queue
jobman.runQueue()
jobman.waitToClear()

accs = []
precs = []
settings["dfg4grn"]["eta_z"] = 0.001
settings["dfg4grn"]["lambda_w"] = 0.01
settings["dfg4grn"]["tau"] = 3
# Read data into program
# Where the format is "FILENAME" "DATATYPE"
mf_storage = ReadData(mf_file[0], "multifactorial")
ko_storage = ReadData(ko_file[0], "knockout")
kd_storage = ReadData(kd_file[0], "knockdown")
wt_storage = ReadData(wt_file[0], "wildtype")

# Setup job manager
jobman = JobManager(settings)

# Make GENIE3 jobs
genie3job = GENIE3()
genie3job.setup(mf_storage, settings, "MF")
jobman.queueJob(genie3job)

mf_storage.combine(ko_storage)
genie3job = GENIE3()
genie3job.setup(mf_storage, settings, "MF_KO")
jobman.queueJob(genie3job)

mf_storage.combine(wt_storage)
genie3job = GENIE3()
genie3job.setup(mf_storage, settings, "MF_KO_WT")
jobman.queueJob(genie3job)

mf_storage.combine(kd_storage)
genie3job = GENIE3()
genie3job.setup(mf_storage, settings, "MF_KO_WT_KD")
jobman.queueJob(genie3job)

print jobman.queue
            if goldnet.network[gene1][gene2] > 0:
                t.append(gene1)
    tfs[name] = list(set(t))

goldnet = Network()
goldnet.read_goldstd(goldnets[data.keys()[0]])

genie3nets = {}

for name in data.keys():
    for i in range(50):
        ts_storage = data[name]
        settings["global"]["time_series_delta_t"] = (1008.0 / (len(ts_storage[0].experiments)-1))
        combined = ReadData(exp_data_directory + '/' + name + '/' + timeseries_filename, "timeseries")[0]
        for ts in timeseries_as_steady_state[name][1:]:
            combined.combine(ts)
        combined.combine(multifactorials[name])

        genie3job = GENIE3()
        genie3job.setup(combined, settings, "Genie3_TimeSeries_SS_{0}-{1}".format(name, i))
        jobman.queueJob(genie3job)
        genie3nets[name] = genie3job


jobman.runQueue()
jobman.waitToClear()

for name in data.keys():
    for i in range(50):
        ts_storage = data[name]
        settings["global"]["time_series_delta_t"] = (1008.0 / (len(ts_storage[0].experiments)-1))
kno3_3 = ReadData("datasets/RootArrayData/KNO3norm3.csv", "dex")
kno3_4 = ReadData("datasets/RootArrayData/KNO3norm4.csv", "dex")


dex_storage.filter(kno3_1.gene_list)
cnlo_storage.filter(kno3_1.gene_list)
cnlo_no3_storage.filter(kno3_1.gene_list)
no3_1_storage.filter(kno3_1.gene_list)
no3_2_storage.filter(kno3_1.gene_list)
no3_3_storage.filter(kno3_1.gene_list)

no3_storage = no3_1_storage
no3_storage.combine(no3_2_storage)
no3_storage.combine(no3_3_storage)

cnlo_no3_storage.combine(no3_storage)

#all_storage.combine(cnlo_no3_storage)

#dex_storage.combine(cnlo_storage)
#dex_storage.combine(no3_storage)

#dex_storage.normalize()
no3_storage.normalize()
cnlo_storage.normalize()
cnlo_no3_storage.normalize()
#all_storage.normalize()

# Set delta_t to be without the last time point
settings["global"]["time_series_delta_t"] = "3 3 3 3 3"
# Remove the last time point from each of these