예제 #1
0
    def Run(self):

        self.transit_message("Starting Normalization")
        start_time = time.time()

        infile = self.infile
        outputPath = self.outfile # output file exists, should I require -overwrite flag?

        # determine ref genome from first; assume they are all the same; assume wigs have 2 header lines
        line2 = "variableStep chrom=" # unknown
        for line in open(infile):
          if line.startswith("variableStep"): line2 = line.rstrip(); break

        if self.combined_wig==True: (sites,data,files) = tnseq_tools.read_combined_wig(self.ctrldata[0])
        else: (data, sites) = tnseq_tools.get_data(self.ctrldata)
        (data,factors) = norm_tools.normalize_data(data,self.normalization)

        print "writing",outputPath
        file = open(outputPath,"w")
        file.write("# %s normalization of %s\n" % (self.normalization,infile))
        if self.combined_wig==True:
          for f in files: file.write("#File: %s\n" % f)
          for i in range(len(sites)): file.write('\t'.join([str(sites[i])]+["%0.1f" % x for x in list(data[...,i])])+"\n")
        else:
          file.write(line2+"\n")
          for j in range(len(sites)):
            file.write("%s %s\n" % (sites[j],int(data[0,j])))
        file.close()

        self.finish()
        self.transit_message("Finished Normalization")
예제 #2
0
def convertToIGV(self, dataset_list, annotationPath, path, normchoice=None):

    if not normchoice:
        normchoice = "nonorm"

    (fulldata, position) = tnseq_tools.get_data(dataset_list)
    (fulldata, factors) = norm_tools.normalize_data(fulldata, normchoice,
                                                    dataset_list,
                                                    annotationPath)
    position = position.astype(int)

    output = open(path, "w")
    output.write("#Converted to IGV with TRANSIT.\n")
    if normchoice != "nonorm":
        output.write("#Reads normalized using '%s'\n" % normchoice)

    output.write("#Files:\n#%s\n" % "\n#".join(dataset_list))
    output.write(
        "#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" %
        ("\t".join([transit_tools.fetch_name(D) for D in dataset_list])))
    chrom = transit_tools.fetch_name(annotationPath)

    for i, pos in enumerate(position):
        output.write(
            "%s\t%s\t%s\tTA%s\t%s\t1\n" %
            (chrom, position[i], position[i] + 1, position[i], "\t".join(
                ["%1.1f" % fulldata[j][i] for j in range(len(fulldata))])))
    output.close()
예제 #3
0
 def test_TTR(self):
     N = len(all_data_list)
     data, position = tnseq_tools.get_data(all_data_list)
     norm_data, factors = norm_tools.normalize_data(data, "TTR")
     self.assertFalse((factors == numpy.ones(N)).all())
     for k in range(N):
         self.assertNotEqual(numpy.mean(norm_data[k]), raw_means[k])
예제 #4
0
 def test_nonorm(self):
     data, position = tnseq_tools.get_data(all_data_list)
     norm_data, factors = norm_tools.normalize_data(data, "nonorm")
     self.assertTrue((factors == numpy.array([1.])).all())
     N = len(all_data_list)
     for k in range(N):
         self.assertEqual(numpy.mean(norm_data[k]), raw_means[k])
예제 #5
0
 def test_nonorm(self):
     data,position = tnseq_tools.get_data(all_data_list)
     norm_data,factors = norm_tools.normalize_data(data, "nonorm")
     self.assertTrue((factors == numpy.array([ 1.])).all())
     N = len(all_data_list)
     for k in range(N):
        self.assertEqual(numpy.mean(norm_data[k]), raw_means[k])
예제 #6
0
    def Run(self):

        self.transit_message("Starting Normalization")
        start_time = time.time()

        infile = self.infile
        outputPath = self.outfile # output file exists, should I require -overwrite flag?

        # determine ref genome from first; assume they are all the same; assume wigs have 2 header lines
        line2 = "variableStep chrom=" # unknown
        for line in open(infile):
          if line.startswith("variableStep"): line2 = line.rstrip(); break

        if self.combined_wig==True: (sites,data,files) = tnseq_tools.read_combined_wig(self.ctrldata[0])
        else: (data, sites) = tnseq_tools.get_data(self.ctrldata)
        (data,factors) = norm_tools.normalize_data(data,self.normalization)

        print "writing",outputPath
        file = open(outputPath,"w")
        file.write("# %s normalization of %s\n" % (self.normalization,infile))
        if self.combined_wig==True:
          for f in files: file.write("#File: %s\n" % f)
          for i in range(len(sites)): file.write('\t'.join([str(sites[i])]+["%0.1f" % x for x in list(data[...,i])])+"\n")
        else:
          file.write(line2+"\n")
          for j in range(len(sites)):
            file.write("%s %s\n" % (sites[j],int(data[0,j])))
        file.close()

        self.finish()
        self.transit_message("Finished Normalization") 
예제 #7
0
 def test_TTR(self):
     N = len(all_data_list)
     data,position = tnseq_tools.get_data(all_data_list)
     norm_data,factors = norm_tools.normalize_data(data, "TTR")
     self.assertFalse((factors == numpy.ones(N)).all())
     for k in range(N):
        self.assertNotEqual(numpy.mean(norm_data[k]), raw_means[k])
예제 #8
0
    def __init__(self,
                 parent,
                 dataset_list=["H37Rv_Sassetti_glycerol.wig"],
                 annotation="H37Rv.prot_table",
                 gene="",
                 scale=None,
                 feature_hashes=[],
                 feature_data=[]):

        view_trash.MainFrame.__init__(self, parent)

        self.parent = parent
        self.size = wx.Size(1500, 800)
        self.start = 1
        self.end = 10000

        #self.orf2data = draw_trash.read_prot_table(annotation)
        #self.hash = draw_trash.hash_prot_genes(annotation)

        self.orf2data = transit_tools.get_gene_info(annotation)
        self.hash = transit_tools.get_pos_hash(annotation)

        self.features = []

        #Data to facilitate search
        self.name2id = {}
        for orf, (name, desc, start, end, strand) in self.orf2data.items():
            name = name.lower()
            if name not in self.name2id: self.name2id[name] = []
            self.name2id[name].append(orf)

        self.lowerid2id = dict([(x.lower(), x) for x in self.orf2data.keys()])
        self.labels = [fetch_name(d) for d in dataset_list]
        (self.fulldata, self.position) = tnseq_tools.get_data(dataset_list)

        #Save normalized data
        (self.fulldata_norm,
         self.factors) = norm_tools.normalize_data(self.fulldata,
                                                   method="nzmean")
        self.wasNorm = False

        #initialize parent class

        self.feature_hashes = feature_hashes
        self.feature_data = feature_data

        if not scale:
            scale = [150] * len(dataset_list)
        self.scale = scale
        self.globalScale = False

        self.datasetChoice.SetItems(self.labels)
        self.datasetChoice.SetSelection(0)

        if gene:
            self.searchText.SetValue(gene)
            self.searchFunc(gene)

        self.updateFunc(parent)
        self.Fit()
예제 #9
0
    def Run(self):

        self.transit_message("Starting IGV Export")
        start_time = time.time()

        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata,
         factors) = norm_tools.normalize_data(fulldata, self.normalization,
                                              self.ctrldata,
                                              self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Converted to IGV with TRANSIT.\n")
        if self.normalization != "nonorm":
            self.output.write("#Reads normalized using '%s'\n" %
                              self.normalization)
            if type(factors[0]) == type(0.0):
                self.output.write(
                    "#Normalization Factors: %s\n" %
                    "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join(
                    [",".join(["%s" % bx for bx in b]) for b in factors]))

        self.output.write("#Files:\n")
        for f in self.ctrldata:
            self.output.write("#%s\n" % f)

        dataset_str = "\t".join(
            [transit_tools.fetch_name(F) for F in self.ctrldata])
        self.output.write("#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" %
                          dataset_str)
        chrom = transit_tools.fetch_name(self.annotation_path)

        (K, N) = fulldata.shape
        self.progress_range(N)
        for i, pos in enumerate(position):
            self.output.write(
                "%s\t%s\t%s\tTA%s\t%s\t1\n" %
                (chrom, position[i], position[i] + 1, position[i], "\t".join(
                    ["%1.1f" % fulldata[j][i] for j in range(len(fulldata))])))

            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0 * i / N)
            self.progress_update(text, i)
        self.output.close()

        self.transit_message("")  # Printing empty line to flush stdout
        self.finish()
        self.transit_message("Finished Export")
예제 #10
0
    def Run(self):

        self.transit_message("Starting Gene Mean Counts Export")
        start_time = time.time()
        
        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, 
            self.ctrldata, self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Summarized to Mean Gene Counts with TRANSIT.\n")
        if self.normalization != "nonorm":
            self.output.write("#Reads normalized using '%s'\n" % self.normalization)
            if type(factors[0]) == type(0.0):
                self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))


        self.output.write("#Files:\n")
        for f in self.ctrldata:
            self.output.write("#%s\n" % f)


        K,Nsites = fulldata.shape
        # Get Gene objects
        G = tnseq_tools.Genes(self.ctrldata, self.annotation_path, norm=self.normalization)
        N = len(G)
        self.progress_range(N)
        dataset_header = "\t".join([transit_tools.fetch_name(D) for D in self.ctrldata])
        self.output.write("#Orf\tName\tNumber of TA sites\t%s\n" % dataset_header)
        for i,gene in enumerate(G):
            if gene.n > 0:
                data_str = "\t".join(["%1.2f" % (M) for M in numpy.mean(gene.reads, 1)])
            else:
                data_str = "\t".join(["%1.2f" % (Z) for Z in numpy.zeros(K)])
            self.output.write("%s\t%s\t%s\t%s\n" % (gene.orf, gene.name, gene.n, data_str))

            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0*i/N)
            self.progress_update(text, i)
        self.output.close()



        self.transit_message("") # Printing empty line to flush stdout 
        self.finish()
        self.transit_message("Finished Export") 
예제 #11
0
    def Run(self):

        self.transit_message("Starting Combined Wig Export")
        start_time = time.time()

        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata,
         factors) = norm_tools.normalize_data(fulldata, self.normalization,
                                              self.ctrldata,
                                              self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Converted to CombinedWig with TRANSIT.\n")
        self.output.write("#normalization method: %s\n" % self.normalization)
        if self.normalization != "nonorm":
            if type(factors[0]) == type(0.0):
                self.output.write(
                    "#Normalization Factors: %s\n" %
                    "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join(
                    [",".join(["%s" % bx for bx in b]) for b in factors]))

        (K, N) = fulldata.shape
        for f in self.ctrldata:
            self.output.write("#File: %s\n" % f)
        self.output.write("#TAcoord\t%s\n" % ('\t'.join(self.ctrldata)))

        for i, pos in enumerate(position):
            #self.output.write("%d\t%s\t%s\n" % (position[i], "\t".join(["%1.1f" % c for c in fulldata[:,i]]),",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
            if self.normalization != 'nonorm':
                vals = "\t".join(["%1.1f" % c for c in fulldata[:, i]])
            else:
                vals = "\t".join(["%d" % c for c in fulldata[:, i]
                                  ])  # no decimals if raw counts
            self.output.write("%d\t%s\t%s\n" % (position[i], vals, ",".join([
                "%s (%s)" % (orf, rv2info.get(orf, ["-"])[0])
                for orf in hash.get(position[i], [])
            ])))
            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0 * i / N)
            self.progress_update(text, i)
        self.output.close()

        self.transit_message("")  # Printing empty line to flush stdout
        self.finish()
        self.transit_message("Finished Export")
예제 #12
0
def convertToCombinedWig(dataset_list,
                         annotationPath,
                         outputPath,
                         normchoice="nonorm"):
    """Normalizes the input datasets and outputs the result in CombinedWig format.
    
    Arguments:
        dataset_list (list): List of paths to datasets in .wig format
        annotationPath (str): Path to annotation in .prot_table or GFF3 format.
        outputPath (str): Desired output path.
        normchoice (str): Choice for normalization method.
            
    """

    (fulldata, position) = tnseq_tools.get_data(dataset_list)
    (fulldata, factors) = norm_tools.normalize_data(fulldata, normchoice,
                                                    dataset_list,
                                                    annotationPath)
    position = position.astype(int)

    hash = get_pos_hash(annotationPath)
    rv2info = get_gene_info(annotationPath)

    output = open(outputPath, "w")
    output.write("#Converted to CombinedWig with TRANSIT.\n")
    if normchoice != "nonorm":
        output.write("#Reads normalized using '%s'\n" % normchoice)
        if type(factors[0]) == type(0.0):
            output.write("#Normalization Factors: %s\n" %
                         "\t".join(["%s" % f for f in factors.flatten()]))
        else:
            output.write(
                "#Normalization Factors: %s\n" %
                " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))

    (K, N) = fulldata.shape
    output.write("#Files:\n")
    for f in dataset_list:
        output.write("#%s\n" % f)

    for i, pos in enumerate(position):
        #output.write("%-10d %s  %s\n" % (position[i], "".join(["%7.1f" % c for c in fulldata[:,i]]),",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
        output.write(
            "%d\t%s\t%s\n" %
            (position[i], "\t".join(["%1.1f" % c
                                     for c in fulldata[:, i]]), ",".join([
                                         "%s (%s)" %
                                         (orf, rv2info.get(orf, ["-"])[0])
                                         for orf in hash.get(position[i], [])
                                     ])))
    output.close()
예제 #13
0
def convertToGeneCountSummary(dataset_list,
                              annotationPath,
                              outputPath,
                              normchoice="nonorm"):
    """Normalizes the input datasets and outputs the result in CombinedWig format.
    
    Arguments:
        dataset_list (list): List of paths to datasets in .wig format
        annotationPath (str): Path to annotation in .prot_table or GFF3 format.
        outputPath (str): Desired output path.
        normchoice (str): Choice for normalization method.
            
    """

    (fulldata, position) = tnseq_tools.get_data(dataset_list)
    (fulldata, factors) = norm_tools.normalize_data(fulldata, normchoice,
                                                    dataset_list,
                                                    annotationPath)
    output = open(outputPath, "w")
    output.write("#Summarized to Mean Gene Counts with TRANSIT.\n")
    if normchoice != "nonorm":
        output.write("#Reads normalized using '%s'\n" % normchoice)
        if type(factors[0]) == type(0.0):
            output.write("#Normalization Factors: %s\n" %
                         "\t".join(["%s" % f for f in factors.flatten()]))
        else:
            output.write(
                "#Normalization Factors: %s\n" %
                " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))

    (K, N) = fulldata.shape
    output.write("#Files:\n")
    for f in dataset_list:
        output.write("#%s\n" % f)

    # Get Gene objects
    G = tnseq_tools.Genes(dataset_list, annotationPath, norm=normchoice)

    dataset_header = "\t".join([os.path.basename(D) for D in dataset_list])
    output.write("#Orf\tName\tNumber of TA sites\t%s\n" % dataset_header)
    for i, gene in enumerate(G):
        if gene.n > 0:
            data_str = "\t".join(
                ["%1.2f" % (M) for M in numpy.mean(gene.reads, 1)])
        else:
            data_str = "\t".join(["%1.2f" % (Z) for Z in numpy.zeros(K)])
        output.write("%s\t%s\t%s\t%s\n" %
                     (gene.orf, gene.name, gene.n, data_str))
    output.close()
예제 #14
0
def get_validated_data(wig_list, wxobj=None):
    """ Returns a tuple of (data, position) containing a matrix of raw read-counts
        , and list of coordinates. 

    Arguments:
        wig_list (list): List of paths to wig files.
        wxobj (object): wxPython GUI object for warnings

    Returns:
        tuple: Two lists containing data and positions of the wig files given.

    :Example:

        >>> import pytransit.tnseq_tools as tnseq_tools
        >>> (data, position) = tnseq_tools.get_validated_data(["data/glycerol_H37Rv_rep1.wig", "data/glycerol_H37Rv_rep2.wig"])
        >>> print data
        array([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
               [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

    .. seealso:: :class:`get_file_types` :class:`combine_replicates` :class:`get_data_zero_fill` :class:`pytransit.norm_tools.normalize_data`
    """

    (status, genome) = validate_wig_format(wig_list, wxobj=wxobj)

    # Regular file with empty sites
    if status == 0:
        return tnseq_tools.get_data(wig_list)
    # No empty sites, decided to proceed as Himar1
    elif status == 1:
        return tnseq_tools.get_data_w_genome(wig_list, genome)
    # No empty sites, decided to proceed as Tn5
    elif status == 2:
        return tnseq_tools.get_data_zero_fill(wig_list)
    # Didn't choose either.... what!?
    else:
        return tnseq_tools.get_data([])
예제 #15
0
파일: igv.py 프로젝트: mad-lab/transit
    def Run(self):

        self.transit_message("Starting IGV Export")
        start_time = time.time()
        
        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization, 
            self.ctrldata, self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Converted to IGV with TRANSIT.\n")
        if self.normalization != "nonorm":
            self.output.write("#Reads normalized using '%s'\n" % self.normalization)
            if type(factors[0]) == type(0.0):
                self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))

        self.output.write("#Files:\n")
        for f in self.ctrldata:
            self.output.write("#%s\n" % f)

        dataset_str = "\t".join([transit_tools.fetch_name(F) for F in self.ctrldata])
        self.output.write("#Chromosome\tStart\tEnd\tFeature\t%s\tTAs\n" % dataset_str)
        chrom = transit_tools.fetch_name(self.annotation_path)

        (K,N) = fulldata.shape
        self.progress_range(N)
        for i,pos in enumerate(position):
            self.output.write("%s\t%s\t%s\tTA%s\t%s\t1\n" % (chrom, position[i], position[i]+1, position[i], "\t".join(["%1.1f" % fulldata[j][i] for j in range(len(fulldata))])))
            
            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0*i/N)
            self.progress_update(text, i)
        self.output.close()




        self.transit_message("") # Printing empty line to flush stdout 
        self.finish()
        self.transit_message("Finished Export") 
예제 #16
0
    def test_genes_creation_fromdata(self):
        data,position = tnseq_tools.get_data(all_data_list)
        Kreps,Nsites = data.shape
        G = tnseq_tools.Genes([], annotation, data=data, position=position)
        N = len(G)
        test_orf = "Rv0001"
        test_name = "dnaA"
        #Test list creation
        self.assertGreater(N, 3000)

        #Test dictionary lookup + data
        self.assertEqual(G[test_orf].orf, test_orf)
        self.assertEqual(G[test_orf].name, test_name)

        #Test list lookup + data
        self.assertEqual(G[0].orf, test_orf)
        self.assertEqual(G[0].name, test_name)
예제 #17
0
    def Run(self):

        self.transit_message("Starting TnseqStats")
        start_time = time.time()

        datasets = self.wigs
        if self.combined_wig == None:
            (data, sites) = tnseq_tools.get_data(self.wigs)
        else:
            (sites, data,
             datasets) = tnseq_tools.read_combined_wig(self.combined_wig)

        # write table of stats (saturation,NZmean)
        file = sys.stdout
        if self.outfile != None: file = open(self.outfile, "w")
        PTI = True
        if PTI == True:
            file.write(
                "dataset\tdensity\tmean_ct\tNZmean\tNZmedian\tmax_ct\ttotal_cts\tskewness\tkurtosis\tpickands_tail_index\n"
            )
        else:
            file.write(
                "dataset\tdensity\tmean_ct\tNZmean\tNZmedian\tmax_ct\ttotal_cts\tskewness\tkurtosis\n"
            )
        for i in range(data.shape[0]):
            density, meanrd, nzmeanrd, nzmedianrd, maxrd, totalrd, skew, kurtosis = tnseq_tools.get_data_stats(
                data[i, :])
            nzmedianrd = int(nzmedianrd) if numpy.isnan(
                nzmedianrd) == False else 0
            pti = self.pickands_tail_index(data[i, :])
            vals = [
                datasets[i],
                "%0.3f" % density,
                "%0.1f" % meanrd,
                "%0.1f" % nzmeanrd,
                "%d" % nzmedianrd, maxrd,
                int(totalrd),
                "%0.1f" % skew,
                "%0.1f" % kurtosis
            ]
            if PTI == True: vals.append("%0.3f" % pti)
            file.write('\t'.join([str(x) for x in vals]) + '\n')
        if self.outfile != None: file.close()

        self.finish()
        self.transit_message("Finished TnseqStats")
예제 #18
0
    def test_genes_creation_fromdata(self):
        data, position = tnseq_tools.get_data(all_data_list)
        Kreps, Nsites = data.shape
        G = tnseq_tools.Genes([], annotation, data=data, position=position)
        N = len(G)
        test_orf = "Rv0001"
        test_name = "dnaA"
        #Test list creation
        self.assertGreater(N, 3000)

        #Test dictionary lookup + data
        self.assertEqual(G[test_orf].orf, test_orf)
        self.assertEqual(G[test_orf].name, test_name)

        #Test list lookup + data
        self.assertEqual(G[0].orf, test_orf)
        self.assertEqual(G[0].name, test_name)
예제 #19
0
    def Run(self):

        self.transit_message("Starting Combined Wig Export")
        start_time = time.time()

        #Get orf data
        self.transit_message("Getting Data")
        (fulldata, position) = tnseq_tools.get_data(self.ctrldata)
        (fulldata, factors) = norm_tools.normalize_data(fulldata, self.normalization,
            self.ctrldata, self.annotation_path)
        position = position.astype(int)

        hash = transit_tools.get_pos_hash(self.annotation_path)
        rv2info = transit_tools.get_gene_info(self.annotation_path)

        self.transit_message("Normalizing")
        self.output.write("#Converted to CombinedWig with TRANSIT.\n")
        self.output.write("#normalization method: %s\n" % self.normalization)
        if self.normalization != "nonorm":
            if type(factors[0]) == type(0.0):
                self.output.write("#Normalization Factors: %s\n" % "\t".join(["%s" % f for f in factors.flatten()]))
            else:
                self.output.write("#Normalization Factors: %s\n" % " ".join([",".join(["%s" % bx for bx in b]) for b in factors]))


        (K,N) = fulldata.shape
        for f in self.ctrldata:
            self.output.write("#File: %s\n" % f)
        self.output.write("#TAcoord\t%s\n" % ('\t'.join(self.ctrldata)))

        for i,pos in enumerate(position):
            #self.output.write("%d\t%s\t%s\n" % (position[i], "\t".join(["%1.1f" % c for c in fulldata[:,i]]),",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
            if self.normalization!='nonorm': vals = "\t".join(["%1.1f" % c for c in fulldata[:,i]])
            else: vals = "\t".join(["%d" % c for c in fulldata[:,i]]) # no decimals if raw counts
            self.output.write("%d\t%s\t%s\n" % (position[i],vals,",".join(["%s (%s)" % (orf,rv2info.get(orf,["-"])[0]) for orf in hash.get(position[i], [])])   ))
            # Update progress
            text = "Running Export Method... %5.1f%%" % (100.0*i/N)
            self.progress_update(text, i)
        self.output.close()



        self.transit_message("") # Printing empty line to flush stdout
        self.finish()
        self.transit_message("Finished Export")
예제 #20
0
    def test_read_data(self):
        data, position = tnseq_tools.get_data(all_data_list)
        K, N = data.shape

        self.assertEqual(K, 5)
        self.assertGreater(N, 70000)
예제 #21
0
파일: tnseq_GI.py 프로젝트: abelew/tnseq_GI
def main(args, kwargs, quite=False, jumble=False):

    missingArgs = False
    if "a1" not in kwargs:
        missingArgs = True
        error("Missing -a1 argument")
    if "a2" not in kwargs:
        missingArgs = True
        error("Missing -a2 argument")
    if "b1" not in kwargs:
        missingArgs = True
        error("Missing -b1 argument")
    if "b2" not in kwargs:
        missingArgs = True
        error("Missing -b2 argument")
    if "pt" not in kwargs:
        missingArgs = True
        error("Missing -pt argument")

    if missingArgs:
        usage()
        sys.exit()

    A_1list = kwargs["a1"].split(",")
    A_2list = kwargs["a2"].split(",")
    B_1list = kwargs["b1"].split(",")
    B_2list = kwargs["b2"].split(",")

    annotation = kwargs["pt"]
    rope = float(kwargs.get("rope", 0.5))
    S = int(kwargs.get("s", 100000))
    norm_method = kwargs.get("n", "TTR")
    label = kwargs.get("l", "debug")
    onlyNZ = kwargs.get("-nz", False)
    doBFDR = kwargs.get("-bfdr", False)
    doFWER = kwargs.get("-fwer", False)
    DEBUG = []
    if "debug" in kwargs:
        DEBUG = kwargs["debug"].split(",")

    wiglist = A_1list + B_1list + A_2list + B_2list

    Nwig = len(wiglist)
    Na1 = len(A_1list)
    Nb1 = len(A_1list)
    Na2 = len(B_2list)
    Nb2 = len(B_2list)

    (data, position) = tnseq_tools.get_data(wiglist)

    ######### FILTER EMTPY SITES #########
    if onlyNZ:
        ii_good = numpy.sum(data, 0) > 0
        data = data[:, ii_good]
        position = position[ii_good]
    ######################################

    (data, factors) = norm_tools.normalize_data(data, norm_method, wiglist,
                                                sys.argv[1])

    if jumble:
        numpy.random.shuffle(data.flat)
        numpy.random.shuffle(data.flat)

    G_A1 = tnseq_tools.Genes([],
                             annotation,
                             data=data[:Na1],
                             position=position)
    G_B1 = tnseq_tools.Genes([],
                             annotation,
                             data=data[Na1:(Na1 + Nb1)],
                             position=position)
    G_A2 = tnseq_tools.Genes([],
                             annotation,
                             data=data[(Na1 + Nb1):(Na1 + Nb1 + Na2)],
                             position=position)
    G_B2 = tnseq_tools.Genes([],
                             annotation,
                             data=data[(Na1 + Nb1 + Na2):],
                             position=position)

    means_list_a1 = []
    means_list_b1 = []
    means_list_a2 = []
    means_list_b2 = []

    var_list_a1 = []
    var_list_a2 = []
    var_list_b1 = []
    var_list_b2 = []

    # Base priors on empirical observations accross genes.
    for gene in sorted(G_A1):
        if gene.n > 1:
            A1_data = G_A1[gene.orf].reads.flatten()
            B1_data = G_B1[gene.orf].reads.flatten()
            A2_data = G_A2[gene.orf].reads.flatten()
            B2_data = G_B2[gene.orf].reads.flatten()

            means_list_a1.append(numpy.mean(A1_data))
            var_list_a1.append(numpy.var(A1_data))

            means_list_b1.append(numpy.mean(B1_data))
            var_list_b1.append(numpy.var(B1_data))

            means_list_a2.append(numpy.mean(A2_data))
            var_list_a2.append(numpy.var(A2_data))

            means_list_b2.append(numpy.mean(B2_data))
            var_list_b2.append(numpy.var(B2_data))

    # Priors
    mu0_A1 = scipy.stats.trim_mean(means_list_a1, 0.01)
    mu0_B1 = scipy.stats.trim_mean(means_list_b1, 0.01)
    mu0_A2 = scipy.stats.trim_mean(means_list_a2, 0.01)
    mu0_B2 = scipy.stats.trim_mean(means_list_b2, 0.01)

    s20_A1 = scipy.stats.trim_mean(var_list_a1, 0.01)
    s20_B1 = scipy.stats.trim_mean(var_list_b1, 0.01)
    s20_A2 = scipy.stats.trim_mean(var_list_a2, 0.01)
    s20_B2 = scipy.stats.trim_mean(var_list_b2, 0.01)

    k0 = 1.0
    nu0 = 1.0

    data = []
    postprob = []

    if not quite:
        print "# Created with '%s'.  Copyright 2016-2017. Michael A. DeJesus & Thomas R. Ioerger" % (
            sys.argv[0])
        print "# Version %1.2f; http://saclab.tamu.edu/essentiality/GI" % __version__
        print "#"
        print "# python %s" % " ".join(sys.argv)
        print "# Samples = %d, k0=%1.1f, nu0=%1.1f" % (S, k0, nu0)
        print "# Mean Prior:       Variance Prior:"
        print "# mu0_A1 = %1.2f    s20_A1 = %1.1f" % (mu0_A1, s20_A1)
        print "# mu0_B1 = %1.2f    s20_B1 = %1.1f" % (mu0_B1, s20_B1)
        print "# mu0_A2 = %1.2f    s20_A2 = %1.1f" % (mu0_A2, s20_A2)
        print "# mu0_B2 = %1.2f    s20_B2 = %1.1f" % (mu0_B2, s20_B2)
        print "# ROPE:", rope
        print "# TTR Factors:", ", ".join(
            ["%1.4f" % x for x in numpy.array(factors).flatten()])
    for gene in sorted(G_A1):

        if len(DEBUG) > 0:
            if gene.orf not in DEBUG: continue

        if gene.n > 0:
            A1_data = G_A1[gene.orf].reads.flatten()
            B1_data = G_B1[gene.orf].reads.flatten()
            A2_data = G_A2[gene.orf].reads.flatten()
            B2_data = G_B2[gene.orf].reads.flatten()

            #            Time-1   Time-2
            #
            #  Strain-A     A       C
            #
            #  Strain-B     B       D

            try:
                muA1_post, varA1_post = sample_post(A1_data, S, mu0_A1, s20_A1,
                                                    k0, nu0)
                muB1_post, varB1_post = sample_post(B1_data, S, mu0_B1, s20_B1,
                                                    k0, nu0)
                muA2_post, varA2_post = sample_post(A2_data, S, mu0_A2, s20_A2,
                                                    k0, nu0)
                muB2_post, varB2_post = sample_post(B2_data, S, mu0_B2, s20_B2,
                                                    k0, nu0)
            except Exception as e:
                muA1_post = varA1_post = numpy.ones(S)
                muB1_post = varB1_post = numpy.ones(S)
                muA2_post = varA2_post = numpy.ones(S)
                muB2_post = varB2_post = numpy.ones(S)

            logFC_A_post = numpy.log2(muA2_post / muA1_post)
            logFC_B_post = numpy.log2(muB2_post / muB1_post)
            delta_logFC_post = logFC_B_post - logFC_A_post

            alpha = 0.05

            # Get Bounds of the HDI
            l_logFC_A, u_logFC_A = HDI_from_MCMC(logFC_A_post, 1 - alpha)

            l_logFC_B, u_logFC_B = HDI_from_MCMC(logFC_B_post, 1 - alpha)

            l_delta_logFC, u_delta_logFC = HDI_from_MCMC(
                delta_logFC_post, 1 - alpha)

            mean_logFC_A = numpy.mean(logFC_A_post)
            mean_logFC_B = numpy.mean(logFC_B_post)
            mean_delta_logFC = numpy.mean(delta_logFC_post)

            # Is HDI significantly different than ROPE?
            not_HDI_overlap_bit = l_delta_logFC > rope or u_delta_logFC < -rope

            # Probability of posterior overlaping with ROPE
            probROPE = numpy.mean(
                numpy.logical_and(delta_logFC_post >= 0.0 - rope,
                                  delta_logFC_post <= 0.0 + rope))

        else:
            A1_data = [0, 0]
            B1_data = [0, 0]
            A2_data = [0, 0]
            B2_data = [0, 0]

            mean_logFC_A = 0
            mean_logFC_B = 0
            mean_delta_logFC = 0
            l_logFC_A = 0
            u_logFC_A = 0
            l_logFC_B = 0
            u_logFC_B = 0
            l_delta_logFC = 0
            u_delta_logFC = 0
            probROPE = 1.0

        if numpy.isnan(l_logFC_A):
            l_logFC_A = -10
            u_logFC_A = 10
        if numpy.isnan(l_logFC_B):
            l_logFC_B = -10
            u_logFC_B = 10
        if numpy.isnan(l_delta_logFC):
            l_delta_logFC = -10
            u_delta_logFC = 10

        if DEBUG:

            out = open("%s.%s_muA1_post" % (label, gene.orf), "w")
            for x in muA1_post:
                print >> out, x

            out = open("%s.%s_muA2_post" % (label, gene.orf), "w")
            for x in muA2_post:
                print >> out, x

            out = open("%s.%s_logFC_A_post" % (label, gene.orf), "w")
            for x in logFC_A_post:
                print >> out, x

            out = open("%s.%s_muB1_post" % (label, gene.orf), "w")
            for x in muB1_post:
                print >> out, x

            out = open("%s.%s_muB2_post" % (label, gene.orf), "w")
            for x in muB2_post:
                print >> out, x

            out = open("%s.%s_logFC_B_post" % (label, gene.orf), "w")
            for x in logFC_A_post:
                print >> out, x

            out = open("%s.%s_delta_logFC_post" % (label, gene.orf), "w")
            for x in delta_logFC_post:
                print >> out, x

        postprob.append(probROPE)
        data.append((gene.orf, gene.name, gene.n, numpy.mean(muA1_post),
                     numpy.mean(muA2_post), numpy.mean(muB1_post),
                     numpy.mean(muB2_post), mean_logFC_A, mean_logFC_B,
                     mean_delta_logFC, l_delta_logFC, u_delta_logFC, probROPE,
                     not_HDI_overlap_bit))

    if doBFDR or not doFWER:
        postprob = numpy.array(postprob)
        postprob.sort()
        bfdr = numpy.cumsum(postprob) / numpy.arange(1, len(postprob) + 1)
        adjusted_prob = bfdr
        adjusted_label = "BFDR"
        if doBFDR:
            data.sort(key=lambda x: x[-2])
        else:
            data.sort(key=lambda x: x[-1], reverse=True)
    elif doFWER:
        fwer = FWER_Bayes(postprob)
        fwer.sort()
        adjusted_prob = fwer
        adjusted_label = "FWER"
        data.sort(key=lambda x: x[-2])

    return (data, adjusted_prob, adjusted_label)
예제 #22
0
 def test_normalization(self):
     N = len(all_data_list)
     data, position = tnseq_tools.get_data(all_data_list)
     norm_data, factors = norm_tools.normalize_data(data, "TTR")
     self.assertFalse((factors == numpy.ones(N)).all())
예제 #23
0
    def __init__( self, parent, datasets):

        try:
            self.qc_prefix = "[QualityControl]"
            self.index_stats = 0
            self.plots_list = []

            self.wigList = datasets

            wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = "Quality Control", pos = wx.DefaultPosition, size = wx.Size( 1560, 900 ), style = wx.DEFAULT_FRAME_STYLE|wx.TAB_TRAVERSAL )

            #self.SetSizeHints( wx.DefaultSize, wx.DefaultSize )

            bSizer9 = wx.BoxSizer( wx.VERTICAL )

            self.m_scrolledWindow1 = wx.ScrolledWindow( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.HSCROLL|wx.VSCROLL )
            self.m_scrolledWindow1.SetScrollRate( 5, 5 )
            bSizer10 = wx.BoxSizer( wx.VERTICAL )

            self.plotsScrolledWindow = wx.ScrolledWindow( self.m_scrolledWindow1, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.HSCROLL|wx.VSCROLL )
            self.plotsScrolledWindow.SetScrollRate( 5, 5 )
            self.plotsScrolledWindow.SetMinSize( wx.Size( -1, 515 ) )

            #plotsSizer = wx.BoxSizer( wx.VERTICAL )
            plotsSizer = wx.BoxSizer( wx.HORIZONTAL )

            self.plotsBitmap1 = wx.StaticBitmap( self.plotsScrolledWindow, wx.ID_ANY, wx.NullBitmap, wx.DefaultPosition, wx.DefaultSize, 0 )
            self.plotsBitmap2 = wx.StaticBitmap( self.plotsScrolledWindow, wx.ID_ANY, wx.NullBitmap, wx.DefaultPosition, wx.DefaultSize, 0 )
            self.plotsBitmap3 = wx.StaticBitmap( self.plotsScrolledWindow, wx.ID_ANY, wx.NullBitmap, wx.DefaultPosition, wx.DefaultSize, 0 )

            plotsSizer.Add( self.plotsBitmap1, 0, wx.ALL, 5 )
            plotsSizer.Add( self.plotsBitmap2, 0, wx.ALL, 5 )
            plotsSizer.Add( self.plotsBitmap3, 0, wx.ALL, 5 )

            #self.plotsBitmap.SetMaxSize( wx.Size( 400,400 ) )
            #self.plotsFigure = Figure()
            #self.plotsAxes = self.plotsFigure.add_subplot(111)
            #self.plotsCanvas = FigureCanvas(self, -1, self.plotsFigure)
            #plotsSizer.Add( self.plotsCanvas, 0, wx.ALL, 5 )

            self.plotsScrolledWindow.SetSizer( plotsSizer )
            self.plotsScrolledWindow.Layout()
            plotsSizer.Fit( self.plotsScrolledWindow )
            bSizer10.Add( self.plotsScrolledWindow, 0, wx.EXPAND |wx.ALL, 5 )

            self.statsScrolledWindow = wx.ScrolledWindow( self.m_scrolledWindow1, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.HSCROLL|wx.VSCROLL )
            self.statsScrolledWindow.SetScrollRate( 5, 5 )
            self.statsScrolledWindow.SetMaxSize( wx.Size( -1, -1 ) )
            self.statsScrolledWindow.SetMinSize( wx.Size( -1, 300 ) )


            NoteText = """*Note: Plot 1 and 2 truncate the top 1% of reads for readability.
 Selecting a normalization method from the drop down will normalize the data and refresh the figures and table.
 This may take a long time depending on the normalization method chosen."""

            #self.noticeLabel = wx.StaticText( self.statsScrolledWindow, wx.ID_ANY, u"*Note: Plot 1 and 2 truncate the top 1% of reads for readability.", wx.DefaultPosition, wx.DefaultSize, wx.ALIGN_CENTRE)
            self.noticeLabel = wx.StaticText( self.statsScrolledWindow, wx.ID_ANY, NoteText, wx.DefaultPosition, wx.DefaultSize, wx.ALIGN_LEFT)

            normChoices = sorted(norm_tools.methods.keys()) #[ u"nonorm", "TTR", "betageom"]
            self.normChoice = wx.Choice( self.statsScrolledWindow, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, normChoices, 0 )
            #self.normChoice.SetSelection( 0 )
            self.normChoice.SetStringSelection("nonorm")
            #noteSizer = wx.BoxSizer( wx.VERTICAL )
            #noteSizer.Add(self.noticeLabel, wx.ALL|wx.EXPAND, 5 )


            self.normLabel = wx.StaticText( self.statsScrolledWindow, wx.ID_ANY, u"Normalization:", wx.DefaultPosition, wx.DefaultSize, wx.ALIGN_CENTRE)
            statsSizer = wx.BoxSizer( wx.VERTICAL )
            normSizer = wx.BoxSizer( wx.HORIZONTAL )

            self.statsListCtrl = wx.ListCtrl( self.statsScrolledWindow, wx.ID_ANY, wx.DefaultPosition, wx.Size( -1, 140 ), wx.LC_REPORT |wx.LC_SINGLE_SEL )


            normSizer.Add(self.normLabel, 1, wx.ALL|wx.ALIGN_CENTER_VERTICAL, 5)
            normSizer.Add(self.normChoice, 0, wx.ALL, 5)

            statsSizer.Add( self.noticeLabel, 0, wx.EXPAND, 5 )
            #statsSizer.Add( self.normChoice, 0, wx.ALL, 5 )
            statsSizer.Add(normSizer, 0, wx.ALL, 5)
            statsSizer.Add( self.statsListCtrl, 1, wx.ALL|wx.EXPAND, 5 )

            

            self.statsScrolledWindow.SetSizer( statsSizer )
            self.statsScrolledWindow.Layout()
            statsSizer.Fit( self.statsScrolledWindow )
            bSizer10.Add( self.statsScrolledWindow, 0, wx.EXPAND |wx.ALL, 5 )


            self.m_scrolledWindow1.SetSizer( bSizer10 )
            self.m_scrolledWindow1.Layout()
            bSizer10.Fit( self.m_scrolledWindow1 )
            bSizer9.Add( self.m_scrolledWindow1, 1, wx.EXPAND |wx.ALL, 5 )


            self.SetSizer( bSizer9 )
            self.Layout()

            self.Centre( wx.BOTH )

            ########################
            # Connect Events
            self.statsListCtrl.Bind( wx.EVT_LIST_ITEM_SELECTED, self.onStatsItemSelect)
            self.normChoice.Bind( wx.EVT_CHOICE, self.onNormSelect )
            self.Bind(wx.EVT_CLOSE, self.OnClose)


            #######
            self.index_stats = 0
            self.statsListCtrl.InsertColumn(0, 'File', width=250)
            self.statsListCtrl.InsertColumn(1, 'Density', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(2, 'Mean Read', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(3, 'NZMean Read', wx.LIST_FORMAT_CENTRE, width=115)
            self.statsListCtrl.InsertColumn(4, 'NZMedian Read', wx.LIST_FORMAT_CENTRE, width=125)
            self.statsListCtrl.InsertColumn(5, 'Max Read', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(6, 'Total Reads', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(7, 'Skew', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(8, 'Kurtosis', wx.LIST_FORMAT_CENTRE, width=85)




            ############################
            self.norm = "nonorm"
            (self.data, self.position) = tnseq_tools.get_data(self.wigList)
        

            self.refresh()
            #self.updateFiles()
            #self.addPlots()
            #self.statsListCtrl.Select(0)
            #self.onStatsItemSelect(None)
            ###########################
            #self.bSizer9.Fit()

        except Exception as e:
            print self.qc_prefix, "Error:", e
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
예제 #24
0
    def __init__( self, parent, datasets):

        try:
            self.qc_prefix = "[QualityControl]"
            self.index_stats = 0
            self.plots_list = []

            self.wigList = datasets

            wx.Frame.__init__ ( self, parent, id = wx.ID_ANY, title = "Quality Control", pos = wx.DefaultPosition, size = wx.Size( 1560, 900 ), style = wx.DEFAULT_FRAME_STYLE|wx.TAB_TRAVERSAL )

            #self.SetSizeHints( wx.DefaultSize, wx.DefaultSize )

            bSizer9 = wx.BoxSizer( wx.VERTICAL )

            self.m_scrolledWindow1 = wx.ScrolledWindow( self, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.HSCROLL|wx.VSCROLL )
            self.m_scrolledWindow1.SetScrollRate( 5, 5 )
            bSizer10 = wx.BoxSizer( wx.VERTICAL )

            self.plotsScrolledWindow = wx.ScrolledWindow( self.m_scrolledWindow1, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.HSCROLL|wx.VSCROLL )
            self.plotsScrolledWindow.SetScrollRate( 5, 5 )
            self.plotsScrolledWindow.SetMinSize( wx.Size( -1, 515 ) )

            #plotsSizer = wx.BoxSizer( wx.VERTICAL )
            plotsSizer = wx.BoxSizer( wx.HORIZONTAL )

            self.plotsBitmap1 = wx.StaticBitmap( self.plotsScrolledWindow, wx.ID_ANY, wx.NullBitmap, wx.DefaultPosition, wx.DefaultSize, 0 )
            self.plotsBitmap2 = wx.StaticBitmap( self.plotsScrolledWindow, wx.ID_ANY, wx.NullBitmap, wx.DefaultPosition, wx.DefaultSize, 0 )
            self.plotsBitmap3 = wx.StaticBitmap( self.plotsScrolledWindow, wx.ID_ANY, wx.NullBitmap, wx.DefaultPosition, wx.DefaultSize, 0 )

            plotsSizer.Add( self.plotsBitmap1, 0, wx.ALL, 5 )
            plotsSizer.Add( self.plotsBitmap2, 0, wx.ALL, 5 )
            plotsSizer.Add( self.plotsBitmap3, 0, wx.ALL, 5 )

            #self.plotsBitmap.SetMaxSize( wx.Size( 400,400 ) )
            #self.plotsFigure = Figure()
            #self.plotsAxes = self.plotsFigure.add_subplot(111)
            #self.plotsCanvas = FigureCanvas(self, -1, self.plotsFigure)
            #plotsSizer.Add( self.plotsCanvas, 0, wx.ALL, 5 )

            self.plotsScrolledWindow.SetSizer( plotsSizer )
            self.plotsScrolledWindow.Layout()
            plotsSizer.Fit( self.plotsScrolledWindow )
            bSizer10.Add( self.plotsScrolledWindow, 0, wx.EXPAND |wx.ALL, 5 )

            self.statsScrolledWindow = wx.ScrolledWindow( self.m_scrolledWindow1, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, wx.HSCROLL|wx.VSCROLL )
            self.statsScrolledWindow.SetScrollRate( 5, 5 )
            self.statsScrolledWindow.SetMaxSize( wx.Size( -1, -1 ) )
            self.statsScrolledWindow.SetMinSize( wx.Size( -1, 300 ) )


            NoteText = """*Note: Plot 1 and 2 truncate the top 1% of reads for readability.
 Selecting a normalization method from the drop down will normalize the data and refresh the figures and table.
 This may take a long time depending on the normalization method chosen."""

            #self.noticeLabel = wx.StaticText( self.statsScrolledWindow, wx.ID_ANY, u"*Note: Plot 1 and 2 truncate the top 1% of reads for readability.", wx.DefaultPosition, wx.DefaultSize, wx.ALIGN_CENTRE)
            self.noticeLabel = wx.StaticText( self.statsScrolledWindow, wx.ID_ANY, NoteText, wx.DefaultPosition, wx.DefaultSize, wx.ALIGN_LEFT)

            normChoices = sorted(norm_tools.methods.keys()) #[ u"nonorm", "TTR", "betageom"]
            self.normChoice = wx.Choice( self.statsScrolledWindow, wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, normChoices, 0 )
            #self.normChoice.SetSelection( 0 )
            self.normChoice.SetStringSelection("nonorm")
            #noteSizer = wx.BoxSizer( wx.VERTICAL )
            #noteSizer.Add(self.noticeLabel, wx.ALL|wx.EXPAND, 5 )


            self.normLabel = wx.StaticText( self.statsScrolledWindow, wx.ID_ANY, u"Normalization:", wx.DefaultPosition, wx.DefaultSize, wx.ALIGN_CENTRE)
            statsSizer = wx.BoxSizer( wx.VERTICAL )
            normSizer = wx.BoxSizer( wx.HORIZONTAL )

            self.statsListCtrl = wx.ListCtrl( self.statsScrolledWindow, wx.ID_ANY, wx.DefaultPosition, wx.Size( -1, 140 ), wx.LC_REPORT |wx.LC_SINGLE_SEL )


            normSizer.Add(self.normLabel, 1, wx.ALL|wx.ALIGN_CENTER_VERTICAL, 5)
            normSizer.Add(self.normChoice, 0, wx.ALL, 5)

            statsSizer.Add( self.noticeLabel, 0, wx.EXPAND, 5 )
            #statsSizer.Add( self.normChoice, 0, wx.ALL, 5 )
            statsSizer.Add(normSizer, 0, wx.ALL, 5)
            statsSizer.Add( self.statsListCtrl, 1, wx.ALL|wx.EXPAND, 5 )

            

            self.statsScrolledWindow.SetSizer( statsSizer )
            self.statsScrolledWindow.Layout()
            statsSizer.Fit( self.statsScrolledWindow )
            bSizer10.Add( self.statsScrolledWindow, 0, wx.EXPAND |wx.ALL, 5 )


            self.m_scrolledWindow1.SetSizer( bSizer10 )
            self.m_scrolledWindow1.Layout()
            bSizer10.Fit( self.m_scrolledWindow1 )
            bSizer9.Add( self.m_scrolledWindow1, 1, wx.EXPAND |wx.ALL, 5 )


            self.SetSizer( bSizer9 )
            self.Layout()

            self.Centre( wx.BOTH )

            ########################
            # Connect Events
            self.statsListCtrl.Bind( wx.EVT_LIST_ITEM_SELECTED, self.onStatsItemSelect)
            self.normChoice.Bind( wx.EVT_CHOICE, self.onNormSelect )
            self.Bind(wx.EVT_CLOSE, self.OnClose)


            #######
            self.index_stats = 0
            self.statsListCtrl.InsertColumn(0, 'File', width=250)
            self.statsListCtrl.InsertColumn(1, 'Density', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(2, 'Mean Read', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(3, 'NZMean Read', wx.LIST_FORMAT_CENTRE, width=115)
            self.statsListCtrl.InsertColumn(4, 'NZMedian Read', wx.LIST_FORMAT_CENTRE, width=125)
            self.statsListCtrl.InsertColumn(5, 'Max Read', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(6, 'Total Reads', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(7, 'Skew', wx.LIST_FORMAT_CENTRE, width=85)
            self.statsListCtrl.InsertColumn(8, 'Kurtosis', wx.LIST_FORMAT_CENTRE, width=85)




            ############################
            self.norm = "nonorm"
            (self.data, self.position) = tnseq_tools.get_data(self.wigList)
        

            self.refresh()
            #self.updateFiles()
            #self.addPlots()
            #self.statsListCtrl.Select(0)
            #self.onStatsItemSelect(None)
            ###########################
            #self.bSizer9.Fit()

        except Exception as e:
            print(self.qc_prefix, "Error:", e)
            exc_type, exc_obj, exc_tb = sys.exc_info()
            fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
            print(exc_type, fname, exc_tb.tb_lineno)
예제 #25
0
 def test_normalization(self):
     N = len(all_data_list)
     data,position = tnseq_tools.get_data(all_data_list)
     norm_data,factors = norm_tools.normalize_data(data, "TTR")
     self.assertFalse((factors == numpy.ones(N)).all())
예제 #26
0
    def test_read_data(self):
        data,position = tnseq_tools.get_data(all_data_list)
        K,N = data.shape

        self.assertEqual(K, 5)
        self.assertGreater(N, 70000)