Esempio n. 1
0
    def __call__(self,**kw):
        assembly = kw.get('assembly') or 'guess'
        t1 = track(kw['numerator'],chrmeta=assembly)
        t2 = track(kw['denominator'],chrmeta=assembly)
        format = kw.get('output') or t1.format
        wsize = int(kw.get('window_size') or size_def)
        self.log = kw.get('log',False)
        if isinstance(self.log, basestring):
            self.log = (self.log.lower() in ['1', 'true', 't','on'])
        try:
            self.pseudo = float(kw.get('pseudo'))
        except:
            self.pseudo = pseudo_def
        self.baseline = -log(self.pseudo,2)
        try:
            self.threshold = float(kw.get('threshold'))
        except:
            self.threshold = threshold_def
        distribution = kw.get('distribution',False)
        if isinstance(distribution, basestring):
            distribution = (distribution.lower() in ['1', 'true', 't','on'])
        if distribution:
            genome_length = sum((v['length'] for v in t1.chrmeta.values()))
            self.shifts = list(poisson(float(genome_length)/float(self.sample_num),self.sample_num))
            self.ratios = []

        output = self.temporary_path(fname='ratios_%s-%s.%s'%(t1.name,t2.name,format))
        with track(output, chrmeta=t1.chrmeta, fields=t1.fields,
                   info={'datatype': 'quantitative',
                         'log': self.log,
                         'pseudocounts': self.pseudo,
                         'threshold': self.threshold,
                         'window_size': wsize}) as tout:
            for chrom,vchr in t1.chrmeta.iteritems():
                if wsize > 1:
                    s1 = window_smoothing(t1.read(chrom),window_size=wsize,step_size=1,featurewise=False)
                    s2 = window_smoothing(t2.read(chrom),window_size=wsize,step_size=1,featurewise=False)
                else:
                    s1 = t1.read(chrom)
                    s2 = t2.read(chrom)
                s3 = merge_scores([s1,s2],method=self._divide)
                if distribution:
                    s3 = FeatureStream(self._sample_stream(s3,vchr['length']),fields=s3.fields)
                tout.write(s3, chrom=chrom, clip=True)
        self.new_file(output, 'ratios')

        if distribution:
            pdf = self.temporary_path(fname='%s-%s_ratios_distribution.pdf'%(t1.name,t2.name))
            density_boxplot(self.ratios,output=pdf,
                            name="%s/%s (median=%.2f)" %(t1.name,t2.name,median(self.ratios)))
            self.new_file(pdf, 'boxplot')
        return self.display_time()
Esempio n. 2
0
 def __call__(self, **kw):
     tinput = track(kw.get('track'), chrmeta=kw.get('assembly') or None)
     outformat = kw.get('format',tinput.format)
     wsize = int(kw.get('window_size', size_def) or 10)
     wstep = int(kw.get('window_step', step_def) or 1)
     featurewise = kw.get('by_feature', False)
     if isinstance(featurewise, basestring):
         featurewise = (featurewise.lower() in ['1', 'true', 't','on'])
     output = self.temporary_path(fname=tinput.name+'_smoothed', ext=outformat)
     if featurewise:
         outfields = tinput.fields
         datatype = "qualitative"
     else:
         outfields = ["chr","start", "end", "score"]
         datatype = "quantitative"
     tout = track(output, format=outformat, fields=outfields, chrmeta=tinput.chrmeta, info={'datatype': datatype})
     for chrom in tout.chrmeta.keys():
         s = window_smoothing(
             tinput.read(selection=chrom, fields=outfields),
             window_size=wsize, step_size=wstep,
             featurewise=featurewise)
         tout.write(s, chrom=chrom, clip=True)
     tout.close()
     self.new_file(output, 'smoothed_track')
     return self.display_time()
Esempio n. 3
0
 def __call__(self, **kw):
     tinput = track(kw.get("track"), chrmeta=kw.get("assembly") or None)
     outformat = kw.get("output", tinput.format)
     wsize = int(kw.get("window_size", size_def) or 10)
     wstep = int(kw.get("window_step", step_def) or 1)
     featurewise = kw.get("by_feature", False)
     if isinstance(featurewise, basestring):
         featurewise = featurewise.lower() in ["1", "true", "t", "on"]
     output = self.temporary_path(fname=tinput.name + "_smoothed", ext=outformat)
     if featurewise:
         outfields = tinput.fields
         datatype = "qualitative"
     else:
         outfields = ["chr", "start", "end", "score"]
         datatype = "quantitative"
     tout = track(output, format=outformat, fields=outfields, chrmeta=tinput.chrmeta, info={"datatype": datatype})
     for chrom in tout.chrmeta.keys():
         s = window_smoothing(
             tinput.read(selection=chrom, fields=outfields),
             window_size=wsize,
             step_size=wstep,
             featurewise=featurewise,
         )
         tout.write(s, chrom=chrom, clip=True)
     tout.close()
     self.new_file(output, "smoothed_track")
     return self.display_time()
Esempio n. 4
0
 def test_window_smoothing(self):
     stream = fstream([('chr1', 4, 5, 10.)],
                      fields=['chr', 'start', 'end', 'score'])
     res = list(window_smoothing(stream, window_size=2, step_size=1))
     expected = [('chr1', 4, 5, 5.), ('chr1', 5, 6, 5.)]
     self.assertListEqual(res, expected)
Esempio n. 5
0
 def test_window_smoothing(self):
     stream = fstream([('chr1',4,5,10.)], fields=['chr','start','end','score'])
     res = list(window_smoothing(stream, window_size=2, step_size=1))
     expected = [('chr1',4,5,5.),('chr1',5,6,5.)]
     self.assertListEqual(res,expected)
Esempio n. 6
0
    def __call__(self, **kw):
        assembly = kw.get('assembly') or 'guess'
        t1 = track(kw['numerator'], chrmeta=assembly)
        t2 = track(kw['denominator'], chrmeta=assembly)
        format = kw.get('format') or t1.format
        wsize = int(kw.get('window_size') or size_def)
        self.log = kw.get('log', False)
        if isinstance(self.log, basestring):
            self.log = (self.log.lower() in ['1', 'true', 't', 'on'])
        try:
            self.pseudo = float(kw.get('pseudo'))
        except:
            self.pseudo = pseudo_def
        self.baseline = -log(self.pseudo, 2)
        try:
            self.threshold = float(kw.get('threshold'))
        except:
            self.threshold = threshold_def
        distribution = kw.get('distribution', False)
        if isinstance(distribution, basestring):
            distribution = (distribution.lower() in ['1', 'true', 't', 'on'])
        if distribution:
            genome_length = sum((v['length'] for v in t1.chrmeta.values()))
            self.shifts = list(
                poisson(
                    float(genome_length) / float(self.sample_num),
                    self.sample_num))
            self.ratios = []

        output = self.temporary_path(fname='ratios_%s-%s.%s' %
                                     (t1.name, t2.name, format))
        with track(output,
                   chrmeta=t1.chrmeta,
                   fields=t1.fields,
                   info={
                       'datatype': 'quantitative',
                       'log': self.log,
                       'pseudocounts': self.pseudo,
                       'threshold': self.threshold,
                       'window_size': wsize
                   }) as tout:
            for chrom, vchr in t1.chrmeta.iteritems():
                if wsize > 1:
                    s1 = window_smoothing(t1.read(chrom),
                                          window_size=wsize,
                                          step_size=1,
                                          featurewise=False)
                    s2 = window_smoothing(t2.read(chrom),
                                          window_size=wsize,
                                          step_size=1,
                                          featurewise=False)
                else:
                    s1 = t1.read(chrom)
                    s2 = t2.read(chrom)
                s3 = merge_scores([s1, s2], method=self._divide)
                if distribution:
                    s3 = FeatureStream(self._sample_stream(s3, vchr['length']),
                                       fields=s3.fields)
                tout.write(s3, chrom=chrom, clip=True)
        self.new_file(output, 'ratios')

        if distribution:
            pdf = self.temporary_path(fname='%s-%s_ratios_distribution.pdf' %
                                      (t1.name, t2.name))
            density_boxplot(self.ratios,
                            output=pdf,
                            name="%s/%s (median=%.2f)" %
                            (t1.name, t2.name, median(self.ratios)))
            self.new_file(pdf, 'boxplot')
        return self.display_time()