예제 #1
0
    def Run(self):

        self.transit_message("Starting IGV Export")
        start_time = time.time()

        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata,
         factors) = norm_tools.normalize_data(fulldata, self.normalization,
                                              self.ctrldata,
                                              self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Converted to IGV with TRANSIT.\n")
        if self.normalization != "nonorm":
            self.output.write("#Reads normalized using '%s'\n" %
                              self.normalization)
            if type(factors[0]) == type(0.0):
                self.output.write(
                    "#Normalization Factors: %s\n" %
                    "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join(
                    [",".join(["%s" % bx for bx in b]) for b in factors]))

        self.output.write("#Files:\n")
        for f in self.ctrldata:
            self.output.write("#%s\n" % f)

        dataset_str = "\t".join(
            [transit_tools.fetch_name(F) for F in self.ctrldata])
        self.output.write("#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" %
                          dataset_str)
        chrom = transit_tools.fetch_name(self.annotation_path)

        (K, N) = fulldata.shape
        self.progress_range(N)
        for i, pos in enumerate(position):
            self.output.write(
                "%s\t%s\t%s\tTA%s\t%s\t1\n" %
                (chrom, position[i], position[i] + 1, position[i], "\t".join(
                    ["%1.1f" % fulldata[j][i] for j in range(len(fulldata))])))

            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0 * i / N)
            self.progress_update(text, i)
        self.output.close()

        self.transit_message("")  # Printing empty line to flush stdout
        self.finish()
        self.transit_message("Finished Export")
예제 #2
0
파일: igv.py 프로젝트: mad-lab/transit
    def Run(self):

        self.transit_message("Starting IGV Export")
        start_time = time.time()
        
        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, 
            self.ctrldata, self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Converted to IGV with TRANSIT.\n")
        if self.normalization != "nonorm":
            self.output.write("#Reads normalized using '%s'\n" % self.normalization)
            if type(factors[0]) == type(0.0):
                self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))

        self.output.write("#Files:\n")
        for f in self.ctrldata:
            self.output.write("#%s\n" % f)

        dataset_str = "\t".join([transit_tools.fetch_name(F) for F in self.ctrldata])
        self.output.write("#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" % dataset_str)
        chrom = transit_tools.fetch_name(self.annotation_path)

        (K,N) = fulldata.shape
        self.progress_range(N)
        for i,pos in enumerate(position):
            self.output.write("%s\t%s\t%s\tTA%s\t%s\t1\n" % (chrom, position[i], position[i]+1, position[i], "\t".join(["%1.1f" % fulldata[j][i] for j in range(len(fulldata))])))
            
            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0*i/N)
            self.progress_update(text, i)
        self.output.close()




        self.transit_message("") # Printing empty line to flush stdout 
        self.finish()
        self.transit_message("Finished Export") 
예제 #3
0
 def displayHistogram(self, displayFrame, event):
         gene = displayFrame.grid.GetCellValue(displayFrame.row, 0)
         filepath = os.path.join(ntpath.dirname(displayFrame.path), transit_tools.fetch_name(displayFrame.path))
         filename = os.path.join(filepath, gene+".png")
         if os.path.exists(filename):
             imgWindow = pytransit.fileDisplay.ImgFrame(None, filename)
             imgWindow.Show()
         else:
             transit_tools.ShowError(MSG="Error Displaying File. Histogram image not found. Make sure results were obtained with the histogram option turned on.")
             print("Error Displaying File. Histogram image does not exist.")
예제 #4
0
 def displayHistogram(self, displayFrame, event):
         gene = displayFrame.grid.GetCellValue(displayFrame.row, 0)
         filepath = os.path.join(ntpath.dirname(displayFrame.path), transit_tools.fetch_name(displayFrame.path))
         filename = os.path.join(filepath, gene+".png")
         if os.path.exists(filename):
             imgWindow = pytransit.fileDisplay.ImgFrame(None, filename)
             imgWindow.Show()
         else:
             transit_tools.ShowError(MSG="Error Displaying File. Histogram image not found. Make sure results were obtained with the histogram option turned on.")
             print "Error Displaying File. Histogram image does not exist."
예제 #5
0
    def Run(self):

        self.transit_message("Starting Gene Mean Counts Export")
        start_time = time.time()
        
        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, 
            self.ctrldata, self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Summarized to Mean Gene Counts with TRANSIT.\n")
        if self.normalization != "nonorm":
            self.output.write("#Reads normalized using '%s'\n" % self.normalization)
            if type(factors[0]) == type(0.0):
                self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))


        self.output.write("#Files:\n")
        for f in self.ctrldata:
            self.output.write("#%s\n" % f)


        K,Nsites = fulldata.shape
        # Get Gene objects
        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, norm=self.normalization)
        N = len(G)
        self.progress_range(N)
        dataset_header = "\t".join([transit_tools.fetch_name(D) for D in self.ctrldata])
        self.output.write("#Orf\tName\tNumber of TA sites\t%s\n" % dataset_header)
        for i,gene in enumerate(G):
            if gene.n > 0:
                data_str = "\t".join(["%1.2f" % (M) for M in numpy.mean(gene.reads, 1)])
            else:
                data_str = "\t".join(["%1.2f" % (Z) for Z in numpy.zeros(K)])
            self.output.write("%s\t%s\t%s\t%s\n" % (gene.orf, gene.name, gene.n, data_str))

            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0*i/N)
            self.progress_update(text, i)
        self.output.close()



        self.transit_message("") # Printing empty line to flush stdout 
        self.finish()
        self.transit_message("Finished Export") 
예제 #6
0
    def Run(self):

        #if not self.wxobj:
        #    # Force matplotlib to use good backend for png.
        #    import matplotlib.pyplot as plt
        #elif "matplotlib.pyplot" not in sys.modules:
        try:
            import matplotlib.pyplot as plt
        except:
            print "Error: cannot do histograms"
            self.doHistogram = False

        self.transit_message("Starting resampling Method")
        start_time = time.time()

        if self.doHistogram:
            histPath = os.path.join(
                os.path.dirname(self.output.name),
                transit_tools.fetch_name(self.output.name) + "_histograms")
            if not os.path.isdir(histPath):
                os.makedirs(histPath)
        else:
            histPath = ""

        Kctrl = len(self.ctrldata)
        Kexp = len(self.expdata)
        #Get orf data
        self.transit_message("Getting Data")
        (data, position) = transit_tools.get_validated_data(self.ctrldata +
                                                            self.expdata,
                                                            wxobj=self.wxobj)

        (K, N) = data.shape

        if self.normalization != "nonorm":
            self.transit_message("Normalizing using: %s" % self.normalization)
            (data,
             factors) = norm_tools.normalize_data(data, self.normalization,
                                                  self.ctrldata + self.expdata,
                                                  self.annotation_path)

        if self.LOESS:
            self.transit_message("Performing LOESS Correction")
            for j in range(K):
                data[j] = stat_tools.loess_correction(position, data[j])

        G = tnseq_tools.Genes(self.ctrldata + self.expdata,
                              self.annotation_path,
                              ignoreCodon=self.ignoreCodon,
                              nterm=self.NTerminus,
                              cterm=self.CTerminus,
                              data=data,
                              position=position)

        #G = tnseq_tools.Genes(self.ctrldata+self.expdata, self.annotation_path, norm=self.normalization, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus)

        #Resampling
        data = []
        N = len(G)
        count = 0
        self.progress_range(N)
        for gene in G:
            count += 1
            if gene.k == 0 or gene.n == 0:
                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,
                 pval_2tail, testlist, data1, data2) = (0, 0, 0, 0, 1.00, 1.00,
                                                        1.00, [], [0], [0])
            else:

                if not self.includeZeros:
                    ii = numpy.sum(gene.reads, 0) > 0
                else:
                    ii = numpy.ones(gene.n) == 1

                data1 = gene.reads[:Kctrl, ii].flatten() + self.pseudocount
                data2 = gene.reads[Kctrl:, ii].flatten() + self.pseudocount

                (test_obs, mean1, mean2, log2FC, pval_ltail, pval_utail,
                 pval_2tail, testlist) = stat_tools.resampling(
                     data1,
                     data2,
                     S=self.samples,
                     testFunc=stat_tools.F_mean_diff_flat,
                     adaptive=self.adaptive)

            if self.doHistogram:
                import matplotlib.pyplot as plt
                if testlist:
                    n, bins, patches = plt.hist(testlist,
                                                density=1,
                                                facecolor='c',
                                                alpha=0.75,
                                                bins=100)
                else:
                    n, bins, patches = plt.hist([0, 0],
                                                density=1,
                                                facecolor='c',
                                                alpha=0.75,
                                                bins=100)
                plt.xlabel('Delta Mean')
                plt.ylabel('Probability')
                plt.title('%s - Histogram of Delta Mean' % gene.orf)
                plt.axvline(test_obs,
                            color='r',
                            linestyle='dashed',
                            linewidth=3)
                plt.grid(True)
                genePath = os.path.join(histPath, gene.orf + ".png")
                if not os.path.exists(histPath):
                    os.makedirs(histPath)
                plt.savefig(genePath)
                plt.clf()

            sum1 = numpy.sum(data1)
            sum2 = numpy.sum(data2)
            data.append([
                gene.orf, gene.name, gene.desc, gene.n, mean1, mean2, sum1,
                sum2, test_obs, log2FC, pval_2tail
            ])

            # Update progress
            text = "Running Resampling Method... %5.1f%%" % (100.0 * count / N)
            self.progress_update(text, count)

        #
        self.transit_message("")  # Printing empty line to flush stdout
        self.transit_message("Performing Benjamini-Hochberg Correction")
        data.sort()
        qval = stat_tools.BH_fdr_correction([row[-1] for row in data])

        self.output.write("#Resampling\n")
        if self.wxobj:
            members = sorted([
                attr for attr in dir(self) if not callable(getattr(self, attr))
                and not attr.startswith("__")
            ])
            memberstr = ""
            for m in members:
                memberstr += "%s = %s, " % (m, getattr(self, m))
            self.output.write(
                "#GUI with: norm=%s, samples=%s, pseudocounts=%1.2f, adaptive=%s, histogram=%s, includeZeros=%s, output=%s\n"
                % (self.normalization, self.samples, self.pseudocount,
                   self.adaptive, self.doHistogram, self.includeZeros,
                   self.output.name.encode('utf-8')))
        else:
            self.output.write("#Console: python %s\n" % " ".join(sys.argv))
        self.output.write("#Control Data: %s\n" %
                          (",".join(self.ctrldata).encode('utf-8')))
        self.output.write("#Experimental Data: %s\n" %
                          (",".join(self.expdata).encode('utf-8')))
        self.output.write("#Annotation path: %s\n" %
                          (self.annotation_path.encode('utf-8')))
        self.output.write("#Time: %s\n" % (time.time() - start_time))
        self.output.write("#%s\n" % "\t".join(columns))

        for i, row in enumerate(data):
            (orf, name, desc, n, mean1, mean2, sum1, sum2, test_obs, log2FC,
             pval_2tail) = row
            self.output.write(
                "%s\t%s\t%s\t%d\t%1.1f\t%1.1f\t%1.2f\t%1.1f\t%1.2f\t%1.1f\t%1.5f\t%1.5f\n"
                % (orf, name, desc, n, mean1, mean2, log2FC, sum1, sum2,
                   test_obs, pval_2tail, qval[i]))
        self.output.close()

        self.transit_message("Adding File: %s" % (self.output.name))
        self.add_file(filetype="Resampling")
        self.finish()
        self.transit_message("Finished resampling Method")
예제 #7
0
    def Run(self):

        #if not self.wxobj:
        #    # Force matplotlib to use good backend for png.
        #    import matplotlib.pyplot as plt
        #elif "matplotlib.pyplot" not in sys.modules:
        try:
            import matplotlib.pyplot as plt
        except:
            print("Error: cannot do histograms")
            self.doHistogram = False


        self.transit_message("Starting resampling Method")
        start_time = time.time()

        histPath = ""
        if self.doHistogram:
            histPath = os.path.join(os.path.dirname(self.output.name), transit_tools.fetch_name(self.output.name)+"_histograms")
            if not os.path.isdir(histPath):
                os.makedirs(histPath)

        #Get orf data
        self.transit_message("Getting Data")
        if self.diffStrains:
            self.transit_message("Multiple annotation files found")
            self.transit_message("Mapping ctrl data to {0}, exp data to {1}".format(self.annotation_path, self.annotation_path_exp))

        if self.combinedWigParams:
            (position, data, filenamesInCombWig) = tnseq_tools.read_combined_wig(self.combinedWigParams['combined_wig'])
            conditionsByFile, _, _, _ = tnseq_tools.read_samples_metadata(self.combinedWigParams['samples_metadata'])
            conditions = self.wigs_to_conditions(conditionsByFile, filenamesInCombWig)
            data, conditions = self.filter_wigs_by_conditions(data, conditions, self.combinedWigParams['conditions'])
            data_ctrl = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][0]])
            data_exp = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][1]])
            position_ctrl, position_exp = position, position
        else:
            (data_ctrl, position_ctrl) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
            (data_exp, position_exp) = transit_tools.get_validated_data(self.expdata, wxobj=self.wxobj)
        (K_ctrl, N_ctrl) = data_ctrl.shape
        (K_exp, N_exp) = data_exp.shape

        if not self.diffStrains and (N_ctrl != N_exp):
            self.transit_error("Error: Ctrl and Exp wig files don't have the same number of sites.")
            self.transit_error("Make sure all .wig files come from the same strain.")
            return
        # (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj)

        self.transit_message("Preprocessing Ctrl data...")
        data_ctrl = self.preprocess_data(position_ctrl, data_ctrl)

        self.transit_message("Preprocessing Exp data...")
        data_exp = self.preprocess_data(position_exp, data_exp)

        G_ctrl = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_ctrl, position=position_ctrl)
        G_exp = tnseq_tools.Genes(self.expdata, self.annotation_path_exp, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_exp, position=position_exp)

        doLibraryResampling = False
        # If library string not empty
        if self.ctrl_lib_str or self.exp_lib_str:
            letters_ctrl = set(self.ctrl_lib_str)
            letters_exp = set(self.exp_lib_str)

            # Check if using exactly 1 letters; i.e. no different libraries
            if len(letters_ctrl) == 1 and letters_exp==1:
                pass
            # If using more than one letter, then check no differences in set
            else:
                lib_diff = letters_ctrl ^ letters_exp
                # Check that their differences
                if not lib_diff:
                    doLibraryResampling = True
                else:
                    transit_tools.transit_error("Error: Library Strings (Ctrl = %s, Exp = %s) do not use the same letters. Make sure every letter / library is represented in both Control and Experimental Conditions. Proceeding with resampling assuming all datasets belong to the same library." % (self.ctrl_lib_str, self.exp_lib_str))
                    self.ctrl_lib_str = ""
                    self.exp_lib_str = ""

        (data, qval) = self.run_resampling(G_ctrl, G_exp, doLibraryResampling, histPath)
        self.write_output(data, qval, start_time)

        self.finish()
        self.transit_message("Finished resampling Method")
예제 #8
0
    def Run(self):

        #if not self.wxobj:
        #    # Force matplotlib to use good backend for png.
        #    import matplotlib.pyplot as plt
        #elif "matplotlib.pyplot" not in sys.modules:
        try:
            import matplotlib.pyplot as plt
        except:
            print "Error: cannot do histograms"
            self.doHistogram = False


        self.transit_message("Starting resampling Method")
        start_time = time.time()

        histPath = ""
        if self.doHistogram:
            histPath = os.path.join(os.path.dirname(self.output.name), transit_tools.fetch_name(self.output.name)+"_histograms")
            if not os.path.isdir(histPath):
                os.makedirs(histPath)

        #Get orf data
        self.transit_message("Getting Data")
        if self.diffStrains:
            self.transit_message("Multiple annotation files found")
            self.transit_message("Mapping ctrl data to {0}, exp data to {1}".format(self.annotation_path, self.annotation_path_exp))

        if self.combinedWigParams:
            (position, data, filenamesInCombWig) = tnseq_tools.read_combined_wig(self.combinedWigParams['combined_wig'])
            conditionsByFile, _, _, _ = tnseq_tools.read_samples_metadata(self.combinedWigParams['samples_metadata'])
            conditions = self.wigs_to_conditions(conditionsByFile, filenamesInCombWig)
            data, conditions = self.filter_wigs_by_conditions(data, conditions, self.combinedWigParams['conditions'])
            data_ctrl = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][0]])
            data_exp = numpy.array([d for i, d in enumerate(data) if conditions[i].lower() == self.combinedWigParams['conditions'][1]])
            position_ctrl, position_exp = position, position
        else:
            (data_ctrl, position_ctrl) = transit_tools.get_validated_data(self.ctrldata, wxobj=self.wxobj)
            (data_exp, position_exp) = transit_tools.get_validated_data(self.expdata, wxobj=self.wxobj)
        (K_ctrl, N_ctrl) = data_ctrl.shape
        (K_exp, N_exp) = data_exp.shape

        if not self.diffStrains and (N_ctrl != N_exp):
            self.transit_error("Error: Ctrl and Exp wig files don't have the same number of sites.")
            self.transit_error("Make sure all .wig files come from the same strain.")
            return
        # (data, position) = transit_tools.get_validated_data(self.ctrldata+self.expdata, wxobj=self.wxobj)

        self.transit_message("Preprocessing Ctrl data...")
        data_ctrl = self.preprocess_data(position_ctrl, data_ctrl)

        self.transit_message("Preprocessing Exp data...")
        data_exp = self.preprocess_data(position_exp, data_exp)

        G_ctrl = tnseq_tools.Genes(self.ctrldata, self.annotation_path, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_ctrl, position=position_ctrl)
        G_exp = tnseq_tools.Genes(self.expdata, self.annotation_path_exp, ignoreCodon=self.ignoreCodon, nterm=self.NTerminus, cterm=self.CTerminus, data=data_exp, position=position_exp)

        doLibraryResampling = False
        # If library string not empty
        if self.ctrl_lib_str or self.exp_lib_str:
            letters_ctrl = set(self.ctrl_lib_str)
            letters_exp = set(self.exp_lib_str)

            # Check if using exactly 1 letters; i.e. no different libraries
            if len(letters_ctrl) == 1 and letters_exp==1:
                pass
            # If using more than one letter, then check no differences in set
            else:
                lib_diff = letters_ctrl ^ letters_exp
                # Check that their differences
                if not lib_diff:
                    doLibraryResampling = True
                else:
                    transit_tools.transit_error("Error: Library Strings (Ctrl = %s, Exp = %s) do not use the same letters. Make sure every letter / library is represented in both Control and Experimental Conditions. Proceeding with resampling assuming all datasets belong to the same library." % (self.ctrl_lib_str, self.exp_lib_str))
                    self.ctrl_lib_str = ""
                    self.exp_lib_str = ""

        (data, qval) = self.run_resampling(G_ctrl, G_exp, doLibraryResampling, histPath)
        self.write_output(data, qval, start_time)

        self.finish()
        self.transit_message("Finished resampling Method")