Esempio n. 1
0
    def Run(self):

        #if not self.wxobj:
        #    # Force matplotlib to use good backend for png.
        #    import matplotlib.pyplot as plt
        #elif "matplotlib.pyplot" not in sys.modules:
        try:
            import matplotlib.pyplot as plt
        except:
            print "Error: cannot do histograms"
            self.doHistogram = False

        self.transit_message("Starting resampling Method")
        start_time = time.time()

        if self.doHistogram:
            histPath = os.path.join(
                os.path.dirname(self.output.name),
                transit_tools.fetch_name(self.output.name) + "_histograms")
            if not os.path.isdir(histPath):
                os.makedirs(histPath)
        else:
            histPath = ""

        Kctrl = len(self.ctrldata)
        Kexp = len(self.expdata)
        #Get orf data
        self.transit_message("Getting Data")
        (data, position) = transit_tools.get_validated_data(self.ctrldata +
                                                            self.expdata,
                                                            wxobj=self.wxobj)

        (K, N) = data.shape

        if self.normalization != "nonorm":
            self.transit_message("Normalizing using: %s" % self.normalization)
            (data,
             factors) = norm_tools.normalize_data(data, self.normalization,
                                                  self.ctrldata + self.expdata,
                                                  self.annotation_path)

        if self.LOESS:
            self.transit_message("Performing LOESS Correction")
            for j in range(K):
                data[j] = stat_tools.loess_correction(position, data[j])

        G = tnseq_tools.Genes(self.ctrldata + self.expdata,
                              self.annotation_path,
                              ignoreCodon=self.ignoreCodon,
                              nterm=self.NTerminus,
                              cterm=self.CTerminus,
                              data=data,
                              position=position)

        #G = tnseq_tools.Genes(self.ctrldata+self.expdata, self.annotation_path, norm=self.normalization, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)

        #Resampling
        data = []
        N = len(G)
        count = 0
        self.progress_range(N)
        for gene in G:
            count += 1
            if gene.k == 0 or gene.n == 0:
                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,
                 pval_2tail, testlist, data1, data2) = (0, 0, 0, 0, 1.00, 1.00,
                                                        1.00, [], [0], [0])
            else:

                if not self.includeZeros:
                    ii = numpy.sum(gene.reads, 0) > 0
                else:
                    ii = numpy.ones(gene.n) == 1

                data1 = gene.reads[:Kctrl, ii].flatten() + self.pseudocount
                data2 = gene.reads[Kctrl:, ii].flatten() + self.pseudocount

                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,
                 pval_2tail, testlist) = stat_tools.resampling(
                     data1,
                     data2,
                     S=self.samples,
                     testFunc=stat_tools.F_mean_diff_flat,
                     adaptive=self.adaptive)

            if self.doHistogram:
                import matplotlib.pyplot as plt
                if testlist:
                    n, bins, patches = plt.hist(testlist,
                                                density=1,
                                                facecolor='c',
                                                alpha=0.75,
                                                bins=100)
                else:
                    n, bins, patches = plt.hist([0, 0],
                                                density=1,
                                                facecolor='c',
                                                alpha=0.75,
                                                bins=100)
                plt.xlabel('Delta Mean')
                plt.ylabel('Probability')
                plt.title('%s - Histogram of Delta Mean' % gene.orf)
                plt.axvline(test_obs,
                            color='r',
                            linestyle='dashed',
                            linewidth=3)
                plt.grid(True)
                genePath = os.path.join(histPath, gene.orf + ".png")
                if not os.path.exists(histPath):
                    os.makedirs(histPath)
                plt.savefig(genePath)
                plt.clf()

            sum1 = numpy.sum(data1)
            sum2 = numpy.sum(data2)
            data.append([
                gene.orf, gene.name, gene.desc, gene.n, mean1, mean2, sum1,
                sum2, test_obs, log2FC, pval_2tail
            ])

            # Update progress
            text = "Running Resampling Method... %5.1f%%" % (100.0 * count / N)
            self.progress_update(text, count)

        #
        self.transit_message("")  # Printing empty line to flush stdout
        self.transit_message("Performing Benjamini-Hochberg Correction")
        data.sort()
        qval = stat_tools.BH_fdr_correction([row[-1] for row in data])

        self.output.write("#Resampling\n")
        if self.wxobj:
            members = sorted([
                attr for attr in dir(self) if not callable(getattr(self, attr))
                and not attr.startswith("__")
            ])
            memberstr = ""
            for m in members:
                memberstr += "%s = %s, " % (m, getattr(self, m))
            self.output.write(
                "#GUI with: norm=%s, samples=%s, pseudocounts=%1.2f, adaptive=%s, histogram=%s, includeZeros=%s, output=%s\n"
                % (self.normalization, self.samples, self.pseudocount,
                   self.adaptive, self.doHistogram, self.includeZeros,
                   self.output.name.encode('utf-8')))
        else:
            self.output.write("#Console: python %s\n" % " ".join(sys.argv))
        self.output.write("#Control Data: %s\n" %
                          (",".join(self.ctrldata).encode('utf-8')))
        self.output.write("#Experimental Data: %s\n" %
                          (",".join(self.expdata).encode('utf-8')))
        self.output.write("#Annotation path: %s\n" %
                          (self.annotation_path.encode('utf-8')))
        self.output.write("#Time: %s\n" % (time.time() - start_time))
        self.output.write("#%s\n" % "\t".join(columns))

        for i, row in enumerate(data):
            (orf, name, desc, n, mean1, mean2, sum1, sum2, test_obs, log2FC,
             pval_2tail) = row
            self.output.write(
                "%s\t%s\t%s\t%d\t%1.1f\t%1.1f\t%1.2f\t%1.1f\t%1.2f\t%1.1f\t%1.5f\t%1.5f\n"
                % (orf, name, desc, n, mean1, mean2, log2FC, sum1, sum2,
                   test_obs, pval_2tail, qval[i]))
        self.output.close()

        self.transit_message("Adding File: %s" % (self.output.name))
        self.add_file(filetype="Resampling")
        self.finish()
        self.transit_message("Finished resampling Method")
Esempio n. 2
0
    def Run(self):

        self.transit_message("Starting Griffin Method")
        start_time = time.time()

        #Get orf data
        self.transit_message("Getting Data")

        (data, position) = transit_tools.get_validated_data(self.ctrldata,
                                                            wxobj=self.wxobj)
        (K, N) = data.shape

        if self.normalization and self.normalization != "nonorm":
            self.transit_message("Normalizing using: %s" % self.normalization)
            (data,
             factors) = norm_tools.normalize_data(data, self.normalization,
                                                  self.ctrldata,
                                                  self.annotation_path)

        G = tnseq_tools.Genes(self.ctrldata,
                              self.annotation_path,
                              minread=1,
                              reps=self.replicates,
                              ignoreCodon=self.ignoreCodon,
                              nterm=self.NTerminus,
                              cterm=self.CTerminus,
                              data=data,
                              position=position)

        N = len(G)
        self.progress_range(N)
        count = 0
        pins = G.global_theta()
        pnon = 1.0 - pins
        results = []
        for gene in G:
            if gene.n == 0:
                results.append([gene, 0.0, 1.000])
            else:
                B = 1.0 / math.log(1.0 / pnon)
                u = math.log(gene.n * pins, 1.0 / pnon)
                exprun = tnseq_tools.ExpectedRuns(gene.n, pnon)
                pval = 1.0 - tnseq_tools.GumbelCDF(gene.r, u, B)
                results.append([gene, exprun, pval])

            text = "Running Griffin Method... %5.1f%%" % (100.0 * (count + 1) /
                                                          (N))
            self.progress_update(text, count)
            count += 1

        pval = [row[-1] for row in results]
        padj = stat_tools.BH_fdr_correction(pval)
        for i in range(len(results)):
            results[i].append(padj[i])
        results.sort()

        self.output.write("#Griffin\n")
        if self.wxobj:
            members = sorted([
                attr for attr in dir(self) if not callable(getattr(self, attr))
                and not attr.startswith("__")
            ])
            memberstr = ""
            for m in members:
                memberstr += "%s = %s, " % (m, getattr(self, m))
            self.output.write(
                "#GUI with: ctrldata=%s, annotation=%s, output=%s\n" %
                (",".join(self.ctrldata).encode('utf-8'),
                 self.annotation_path.encode('utf-8'),
                 self.output.name.encode('utf-8')))
        else:
            self.output.write("#Console: python3 %s\n" % " ".join(sys.argv))

        self.output.write("#Data: %s\n" %
                          (",".join(self.ctrldata).encode('utf-8')))
        self.output.write("#Annotation path: %s\n" %
                          self.annotation_path.encode('utf-8'))
        self.output.write("#Time: %s\n" % (time.time() - start_time))
        self.output.write("#%s\n" % "\t".join(columns))

        for (gene, exprun, pval, padj) in results:
            self.output.write(
                "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%1.1f\t%1.5f\t%1.5f\n" %
                (gene.orf, gene.name, gene.desc, gene.k, gene.n, gene.r,
                 gene.s, gene.t, exprun, pval, padj))

        self.output.close()

        self.transit_message("")  # Printing empty line to flush stdout
        self.transit_message("Adding File: %s" % (self.output.name))
        self.add_file(filetype="Griffin")
        self.finish()
        self.transit_message("Finished Griffin Method")
Esempio n. 3
0
    def run_resampling(self, G_ctrl, G_exp = None, doLibraryResampling = False, histPath = ""):
        data = []
        N = len(G_ctrl)
        count = 0
        self.progress_range(N)

        for gene in G_ctrl:
            if gene.orf not in G_exp:
                if self.diffStrains:
                    continue
                else:
                    self.transit_error("Error: Gene in ctrl data not present in exp data")
                    self.transit_error("Make sure all .wig files come from the same strain.")
                    return ([], [])

            gene_exp = G_exp[gene.orf]
            count+=1

            if not self.diffStrains and gene.n != gene_exp.n:
                self.transit_error("Error: No. of TA sites in Exp and Ctrl data are different")
                self.transit_error("Make sure all .wig files come from the same strain.")
                return ([], [])

            if (gene.k == 0 and gene_exp.k == 0) or gene.n == 0 or gene_exp.n == 0:
                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,  pval_2tail, testlist, data1, data2) = (0, 0, 0, 0, 1.00, 1.00, 1.00, [], [0], [0])
            else:
                if not self.includeZeros:
                    ii_ctrl = numpy.sum(gene.reads,0) > 0
                    ii_exp = numpy.sum(gene_exp.reads,0) > 0
                else:
                    ii_ctrl = numpy.ones(gene.n) == 1
                    ii_exp = numpy.ones(gene_exp.n) == 1

                #data1 = gene.reads[:,ii_ctrl].flatten() + self.pseudocount # we used to have an option to add pseudocounts to each observation, like this
                data1 = gene.reads[:,ii_ctrl].flatten() 
                data2 = gene_exp.reads[:,ii_exp].flatten()

                if doLibraryResampling:
                    (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,  pval_2tail, testlist) =  stat_tools.resampling(data1, data2, S=self.samples, testFunc=stat_tools.F_mean_diff_dict, permFunc=stat_tools.F_shuffle_dict_libraries, adaptive=self.adaptive, lib_str1=self.ctrl_lib_str, lib_str2=self.exp_lib_str,PC=self.pseudocount)
                else:
                    (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,  pval_2tail, testlist) =  stat_tools.resampling(data1, data2, S=self.samples, testFunc=stat_tools.F_mean_diff_flat, permFunc=stat_tools.F_shuffle_flat, adaptive=self.adaptive, lib_str1=self.ctrl_lib_str, lib_str2=self.exp_lib_str,PC=self.pseudocount)


            if self.doHistogram:
                import matplotlib.pyplot as plt
                if testlist:
                    n, bins, patches = plt.hist(testlist, density=1, facecolor='c', alpha=0.75, bins=100)
                else:
                    n, bins, patches = plt.hist([0,0], density=1, facecolor='c', alpha=0.75, bins=100)
                plt.xlabel('Delta Mean')
                plt.ylabel('Probability')
                plt.title('%s - Histogram of Delta Mean' % gene.orf)
                plt.axvline(test_obs, color='r', linestyle='dashed', linewidth=3)
                plt.grid(True)
                genePath = os.path.join(histPath, gene.orf +".png")
                if not os.path.exists(histPath):
                    os.makedirs(histPath)
                plt.savefig(genePath)
                plt.clf()


            sum1 = numpy.sum(data1)
            sum2 = numpy.sum(data2)
            data.append([gene.orf, gene.name, gene.desc, gene.n, mean1, mean2, sum1, sum2, test_obs, log2FC, pval_2tail])

            # Update progress
            text = "Running Resampling Method... %5.1f%%" % (100.0*count/N)
            self.progress_update(text, count)


        #
        self.transit_message("") # Printing empty line to flush stdout
        self.transit_message("Performing Benjamini-Hochberg Correction")
        data.sort()
        qval = stat_tools.BH_fdr_correction([row[-1] for row in data])

        return (data, qval)
Esempio n. 4
0
    def Run(self):
        self.transit_message("Starting Tn5 gaps method")
        start_time = time.time()

        self.transit_message("Getting data (May take a while)")

        # Combine all wigs
        (data, position) = transit_tools.get_validated_data(self.ctrldata,
                                                            wxobj=self.wxobj)
        combined = tnseq_tools.combine_replicates(data, method=self.replicates)
        combined[combined < self.minread] = 0
        counts = combined
        counts[counts > 0] = 1
        num_sites = counts.size

        genes_obj = tnseq_tools.Genes(self.ctrldata,
                                      self.annotation_path,
                                      ignoreCodon=self.ignoreCodon,
                                      nterm=self.NTerminus,
                                      cterm=self.CTerminus,
                                      data=data,
                                      position=position)

        pins = numpy.mean(counts)
        pnon = 1.0 - pins

        # Calculate stats of runs
        exprunmax = tnseq_tools.ExpectedRuns(num_sites, pnon)
        varrun = tnseq_tools.VarR(num_sites, pnon)
        stddevrun = math.sqrt(varrun)
        exp_cutoff = exprunmax + 2 * stddevrun

        # Get the runs
        self.transit_message("Getting non-insertion runs in genome")
        run_arr = tnseq_tools.runs_w_info(counts)
        pos_hash = transit_tools.get_pos_hash(self.annotation_path)

        # Finally, calculate the results
        self.transit_message("Running Tn5 gaps method")
        results_per_gene = {}
        for gene in genes_obj.genes:
            results_per_gene[gene.orf] = [
                gene.orf, gene.name, gene.desc, gene.k, gene.n, gene.r, 0, 0, 1
            ]

        N = len(run_arr)
        count = 0
        accum = 0
        self.progress_range(N)
        for run in run_arr:
            accum += run['length']
            count += 1
            genes = tnseq_tools.get_genes_in_range(pos_hash, run['start'],
                                                   run['end'])
            for gene_orf in genes:
                gene = genes_obj[gene_orf]
                inter_sz = self.intersect_size([run['start'], run['end']],
                                               [gene.start, gene.end]) + 1
                percent_overlap = self.calc_overlap([run['start'], run['end']],
                                                    [gene.start, gene.end])
                run_len = run['length']
                B = 1.0 / math.log(1.0 / pnon)
                u = math.log(num_sites * pins, 1.0 / pnon)
                pval = 1.0 - tnseq_tools.GumbelCDF(run['length'], u, B)

                curr_val = results_per_gene[gene.orf]
                curr_inter_sz = curr_val[6]
                curr_len = curr_val[7]
                if inter_sz > curr_inter_sz:
                    results_per_gene[gene.orf] = [
                        gene.orf, gene.name, gene.desc, gene.k, gene.n, gene.r,
                        inter_sz, run_len, pval
                    ]

            # Update Progress
            text = "Running Tn5Gaps method... %1.1f%%" % (100.0 * count / N)
            self.progress_update(text, count)

        data = list(results_per_gene.values())
        exp_run_len = float(accum) / N

        min_sig_len = float('inf')
        sig_genes_count = 0
        pval = [row[-1] for row in data]
        padj = stat_tools.BH_fdr_correction(pval)
        for i in range(len(data)):
            if padj[i] < 0.05:
                sig_genes_count += 1
                min_sig_len = min(min_sig_len, data[i][-2])
            data[i].append(padj[i])
            data[i].append('Essential' if padj[i] < 0.05 else 'Non-essential')
            #(data[i][0], data[i][1], data[i][2], data[i][3], data[i][4], data[i][5], data[i][6], data[i][7], data[i][8], padj[i], 'Essential' if padj[i] < 0.05 else 'Non-essential')
        data.sort(key=lambda l: l[0])

        # Output results
        self.output.write("#Tn5 Gaps\n")
        if self.wxobj:
            members = sorted([
                attr for attr in dir(self) if not callable(getattr(self, attr))
                and not attr.startswith("__")
            ])
            memberstr = ""
            for m in members:
                memberstr += "%s = %s, " % (m, getattr(self, m))
            self.output.write(
                "#GUI with: ctrldata=%s, annotation=%s, output=%s\n" %
                (",".join(self.ctrldata).encode('utf-8'),
                 self.annotation_path.encode('utf-8'),
                 self.output.name.encode('utf-8')))
        else:
            self.output.write("#Console: python %s\n" % " ".join(sys.argv))

        self.output.write("#Data: %s\n" %
                          (",".join(self.ctrldata).encode('utf-8')))
        self.output.write("#Annotation path: %s\n" %
                          self.annotation_path.encode('utf-8'))
        self.output.write("#Time: %s\n" % (time.time() - start_time))
        self.output.write("#Essential gene count: %d\n" % (sig_genes_count))
        self.output.write("#Minimum reads: %d\n" % (self.minread))
        self.output.write("#Replicate combination method: %s\n" %
                          (self.replicates))
        self.output.write("#Minimum significant run length: %d\n" %
                          (min_sig_len))
        self.output.write("#Expected run length: %1.5f\n" % (exp_run_len))
        self.output.write("#Expected max run length: %s\n" % (exprunmax))
        self.output.write("#%s\n" % "\t".join(columns))
        #self.output.write("#Orf\tName\tDesc\tk\tn\tr\tovr\tlenovr\tpval\tpadj\tcall\n")

        for res in data:
            self.output.write(
                "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%1.5f\t%1.5f\t%s\n" %
                (res[0], res[1], res[2], res[3], res[4], res[5], res[6],
                 res[7], res[8], res[9], res[10]))
        self.output.close()

        self.transit_message("")  # Printing empty line to flush stdout
        self.transit_message("Adding File: %s" % (self.output.name))
        self.add_file(filetype="Tn5 Gaps")
        self.finish()
        self.transit_message("Finished Tn5Gaps Method")
Esempio n. 5
0
    def Run(self):

        self.transit_message("Starting rankproduct Method")
        start_time = time.time()

        Kctrl = len(self.ctrldata)
        Kexp = len(self.expdata)
        #Get orf data
        self.transit_message("Getting Data")
        (data, position) = transit_tools.get_validated_data(self.ctrldata +
                                                            self.expdata,
                                                            wxobj=self.wxobj)
        if self.normalization != "none":
            self.transit_message("Normalizing using: %s" % self.normalization)

            (data,
             factors) = norm_tools.normalize_data(data, self.normalization,
                                                  self.ctrldata + self.expdata,
                                                  self.annotation_path)

        Gctrl = tnseq_tools.Genes(self.ctrldata + self.expdata,
                                  self.annotation_path,
                                  ignoreCodon=self.ignoreCodon,
                                  nterm=self.NTerminus,
                                  cterm=self.CTerminus,
                                  data=data[:Kctrl, :],
                                  position=position)

        Gexp = tnseq_tools.Genes(self.ctrldata + self.expdata,
                                 self.annotation_path,
                                 ignoreCodon=self.ignoreCodon,
                                 nterm=self.NTerminus,
                                 cterm=self.CTerminus,
                                 data=data[Kctrl:, :],
                                 position=position)

        Ngenes = len(Gctrl)

        # Get the average counts for all the genes, in each replicate
        meanCtrl = numpy.zeros((Kctrl, Ngenes))
        meanExp = numpy.zeros((Kexp, Ngenes))

        for i in range(Ngenes):
            if numpy.any(Gctrl[i].reads):
                meanCtrl[:, i] = numpy.mean(Gctrl[i].reads, 1)
            else:
                meanCtrl[:, i] = numpy.zeros(Kctrl)
            #
            if numpy.any(Gexp[i].reads):
                meanExp[:, i] = numpy.mean(Gexp[i].reads, 1)
            else:
                meanExp[:, i] = numpy.zeros(Kexp)

        # Calculate a logFC2 between Experimental and Control
        # Then calculates it's rank, and observed rankProduct
        logFC2 = numpy.log2((meanExp + 0.0001) / (meanCtrl + 0.0001))
        rank = numpy.array([scipy.stats.rankdata(Lvec) for Lvec in logFC2])
        obsRP = numpy.power(numpy.prod(rank, 0), 1.0 / Kctrl)

        permutations = numpy.zeros((self.samples, Ngenes))
        tempranks = scipy.array(
            [numpy.arange(1, Ngenes + 1) for rep in range(Kctrl)])
        for s in range(self.samples):
            rankperm = numpy.array(
                [numpy.random.permutation(tr) for tr in tempranks])
            permutations[s] = numpy.power(numpy.prod(rankperm, 0), 1.0 / Kctrl)

        rankRP = numpy.argsort(obsRP) + 1

        #rankproduct
        data = []
        count = 0
        self.progress_range(Ngenes)
        for i, gene in enumerate(Gctrl):
            count += 1

            meanctrl = numpy.mean(Gctrl[i].reads)
            meanexp = numpy.mean(Gexp[i].reads)
            log2fc = numpy.log2((meanexp + 0.0001) / (meanctrl + 0.0001))
            countbetter = numpy.sum(permutations <= obsRP[i])

            pval = countbetter / float(self.samples * Ngenes)
            e_val = countbetter / float(self.samples)
            q_paper = e_val / float(rankRP[i])

            data.append([
                gene.orf, gene.name, gene.desc, gene.n, meanctrl, meanexp,
                log2fc, obsRP[i], e_val, q_paper, pval
            ])

            # Update Progress
            text = "Running rankproduct Method... %5.1f%%" % (100.0 * count /
                                                              Ngenes)
            self.progress_update(text, count)

        #
        self.transit_message("")  # Printing empty line to flush stdout
        self.transit_message("Performing Benjamini-Hochberg Correction")
        data.sort()
        q_bh = stat_tools.BH_fdr_correction([row[-1] for row in data])

        self.output.write("#RankProduct\n")
        if self.wxobj:
            members = sorted([
                attr for attr in dir(self) if not callable(getattr(self, attr))
                and not attr.startswith("__")
            ])
            memberstr = ""
            for m in members:
                memberstr += "%s = %s, " % (m, getattr(self, m))
            self.output.write(
                "#GUI with: ctrldata=%s, annotation=%s, output=%s\n" %
                (",".join(self.ctrldata).encode('utf-8'),
                 self.annotation_path.encode('utf-8'),
                 self.output.name.encode('utf-8')))
        else:
            self.output.write("#Console: python %s\n" % " ".join(sys.argv))

        self.output.write("#Data: %s\n" %
                          (",".join(self.ctrldata).encode('utf-8')))
        self.output.write("#Annotation path: %s\n" %
                          self.annotation_path.encode('utf-8'))
        self.output.write("#Time: %s\n" % (time.time() - start_time))
        self.output.write("#%s\n" % (columns))

        for i, row in enumerate(data):
            (orf, name, desc, n, mean1, mean2, log2FCgene, obsRPgene, e_val,
             q_paper, pval) = row
            self.output.write(
                "%s\t%s\t%s\t%d\t%1.1f\t%1.1f\t%1.2f\t%1.8f\t%1.1f\t%1.8f\n" %
                (orf, name, desc, n, mean1, mean2, log2FCgene, obsRPgene,
                 e_val, q_paper))
        self.output.close()

        self.transit_message("Adding File: %s" % (self.output.name))
        self.add_file(filetype="RankProduct")
        self.finish()
        self.transit_message("Finished rankproduct Method")
Esempio n. 6
0
    def Run(self):

        self.transit_message("Starting Mann-Whitney U-test Method")
        start_time = time.time()



        Kctrl = len(self.ctrldata)
        Kexp = len(self.expdata)
        #Get orf data
        self.transit_message("Getting Data")
        (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj)

        (K,N) = data.shape


        if self.normalization != "nonorm":
            self.transit_message("Normalizing using: %s" % self.normalization)
            (data, factors) = norm_tools.normalize_data(data, self.normalization, self.ctrldata+self.expdata, self.annotation_path)

        if self.LOESS:
            self.transit_message("Performing LOESS Correction")
            for j in range(K):
                data[j] = stat_tools.loess_correction(position, data[j])


        G = tnseq_tools.Genes(self.ctrldata + self.expdata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data, position=position)


        #u-test
        data = []
        N = len(G)
        count = 0
        self.progress_range(N)
        for gene in G:
            count+=1
            if gene.k == 0 or gene.n == 0:
                (test_obs, mean1, mean2, log2FC, u_stat, pval_2tail) = (0, 0, 0, 0, 0.0, 1.00)
            else:

                if not self.includeZeros:
                    ii = numpy.sum(gene.reads,0) > 0
                else:
                    ii = numpy.ones(gene.n) == 1


                data1 = gene.reads[:Kctrl,ii].flatten()
                data2 = gene.reads[Kctrl:,ii].flatten()
                try:
                    u_stat, pval_2tail = scipy.stats.mannwhitneyu(data1, data2,
                        alternative="two-sided")
                except ValueError as e:
                    u_stat, pval_2tail = 0.0, 1.00

                n1 = len(data1)
                n2 = len(data2)

                mean1 = 0
                if n1 > 0:
                    mean1 = numpy.mean(data1)
                mean2 = 0
                if n2 > 0:
                    mean2 = numpy.mean(data2)

                try:
                    # Only adjust log2FC if one of the means is zero
                    if mean1 > 0 and mean2 > 0:
                        log2FC = math.log((mean2)/(mean1),2)
                    else:
                        log2FC = math.log((mean2+1.0)/(mean1+1.0),2)
                except:
                    log2FC = 0.0


            #["Orf","Name","Desc","Sites","Mean Ctrl","Mean Exp","log2FC", "U-Statistic","p-value","Adj. p-value"]


            data.append([gene.orf, gene.name, gene.desc, gene.n, mean1, mean2, log2FC, u_stat, pval_2tail])

            # Update Progress
            text = "Running Mann-Whitney U-test Method... %1.1f%%" % (100.0*count/N)
            self.progress_update(text, count)


        #
        self.transit_message("") # Printing empty line to flush stdout
        self.transit_message("Performing Benjamini-Hochberg Correction")
        data.sort()
        qval = stat_tools.BH_fdr_correction([row[-1] for row in data])


        self.output.write("#utest\n")
        if self.wxobj:
            members = sorted([attr for attr in dir(self) if not callable(getattr(self,attr)) and not attr.startswith("__")])
            memberstr = ""
            for m in members:
                memberstr += "%s = %s, " % (m, getattr(self, m))
            self.output.write("#GUI with: norm=%s, includeZeros=%s, output=%s\n" % (self.normalization, self.includeZeros, self.output.name.encode('utf-8')))
        else:
            self.output.write("#Console: python %s\n" % " ".join(sys.argv))
        self.output.write("#Control Data: %s\n" % (",".join(self.ctrldata).encode('utf-8')))
        self.output.write("#Experimental Data: %s\n" % (",".join(self.expdata).encode('utf-8')))
        self.output.write("#Annotation path: %s\n" % (self.annotation_path.encode('utf-8')))
        self.output.write("#Time: %s\n" % (time.time() - start_time))
        self.output.write("#%s\n" % "\t".join(columns))

        for i,row in enumerate(data):
            (orf, name, desc, n, mean1, mean2, log2FC, u_stat, pval_2tail) = row
            self.output.write("%s\t%s\t%s\t%d\t%1.1f\t%1.1f\t%1.2f\t%1.2f\t%1.5f\t%1.5f\n" % (orf, name, desc, n, mean1, mean2, log2FC, u_stat, pval_2tail, qval[i]))
        self.output.close()

        self.transit_message("Adding File: %s" % (self.output.name))
        self.add_file(filetype="utest")
        self.finish()
        self.transit_message("Finished Mann-Whitney U-test Method")