def __call__(self,**kw): assembly = kw.get('assembly') or 'guess' t1 = track(kw['numerator'],chrmeta=assembly) t2 = track(kw['denominator'],chrmeta=assembly) format = kw.get('output') or t1.format wsize = int(kw.get('window_size') or size_def) self.log = kw.get('log',False) if isinstance(self.log, basestring): self.log = (self.log.lower() in ['1', 'true', 't','on']) try: self.pseudo = float(kw.get('pseudo')) except: self.pseudo = pseudo_def self.baseline = -log(self.pseudo,2) try: self.threshold = float(kw.get('threshold')) except: self.threshold = threshold_def distribution = kw.get('distribution',False) if isinstance(distribution, basestring): distribution = (distribution.lower() in ['1', 'true', 't','on']) if distribution: genome_length = sum((v['length'] for v in t1.chrmeta.values())) self.shifts = list(poisson(float(genome_length)/float(self.sample_num),self.sample_num)) self.ratios = [] output = self.temporary_path(fname='ratios_%s-%s.%s'%(t1.name,t2.name,format)) with track(output, chrmeta=t1.chrmeta, fields=t1.fields, info={'datatype': 'quantitative', 'log': self.log, 'pseudocounts': self.pseudo, 'threshold': self.threshold, 'window_size': wsize}) as tout: for chrom,vchr in t1.chrmeta.iteritems(): if wsize > 1: s1 = window_smoothing(t1.read(chrom),window_size=wsize,step_size=1,featurewise=False) s2 = window_smoothing(t2.read(chrom),window_size=wsize,step_size=1,featurewise=False) else: s1 = t1.read(chrom) s2 = t2.read(chrom) s3 = merge_scores([s1,s2],method=self._divide) if distribution: s3 = FeatureStream(self._sample_stream(s3,vchr['length']),fields=s3.fields) tout.write(s3, chrom=chrom, clip=True) self.new_file(output, 'ratios') if distribution: pdf = self.temporary_path(fname='%s-%s_ratios_distribution.pdf'%(t1.name,t2.name)) density_boxplot(self.ratios,output=pdf, name="%s/%s (median=%.2f)" %(t1.name,t2.name,median(self.ratios))) self.new_file(pdf, 'boxplot') return self.display_time()
def __call__(self, **kw): tinput = track(kw.get('track'), chrmeta=kw.get('assembly') or None) outformat = kw.get('format',tinput.format) wsize = int(kw.get('window_size', size_def) or 10) wstep = int(kw.get('window_step', step_def) or 1) featurewise = kw.get('by_feature', False) if isinstance(featurewise, basestring): featurewise = (featurewise.lower() in ['1', 'true', 't','on']) output = self.temporary_path(fname=tinput.name+'_smoothed', ext=outformat) if featurewise: outfields = tinput.fields datatype = "qualitative" else: outfields = ["chr","start", "end", "score"] datatype = "quantitative" tout = track(output, format=outformat, fields=outfields, chrmeta=tinput.chrmeta, info={'datatype': datatype}) for chrom in tout.chrmeta.keys(): s = window_smoothing( tinput.read(selection=chrom, fields=outfields), window_size=wsize, step_size=wstep, featurewise=featurewise) tout.write(s, chrom=chrom, clip=True) tout.close() self.new_file(output, 'smoothed_track') return self.display_time()
def __call__(self, **kw): tinput = track(kw.get("track"), chrmeta=kw.get("assembly") or None) outformat = kw.get("output", tinput.format) wsize = int(kw.get("window_size", size_def) or 10) wstep = int(kw.get("window_step", step_def) or 1) featurewise = kw.get("by_feature", False) if isinstance(featurewise, basestring): featurewise = featurewise.lower() in ["1", "true", "t", "on"] output = self.temporary_path(fname=tinput.name + "_smoothed", ext=outformat) if featurewise: outfields = tinput.fields datatype = "qualitative" else: outfields = ["chr", "start", "end", "score"] datatype = "quantitative" tout = track(output, format=outformat, fields=outfields, chrmeta=tinput.chrmeta, info={"datatype": datatype}) for chrom in tout.chrmeta.keys(): s = window_smoothing( tinput.read(selection=chrom, fields=outfields), window_size=wsize, step_size=wstep, featurewise=featurewise, ) tout.write(s, chrom=chrom, clip=True) tout.close() self.new_file(output, "smoothed_track") return self.display_time()
def test_window_smoothing(self): stream = fstream([('chr1', 4, 5, 10.)], fields=['chr', 'start', 'end', 'score']) res = list(window_smoothing(stream, window_size=2, step_size=1)) expected = [('chr1', 4, 5, 5.), ('chr1', 5, 6, 5.)] self.assertListEqual(res, expected)
def test_window_smoothing(self): stream = fstream([('chr1',4,5,10.)], fields=['chr','start','end','score']) res = list(window_smoothing(stream, window_size=2, step_size=1)) expected = [('chr1',4,5,5.),('chr1',5,6,5.)] self.assertListEqual(res,expected)
def __call__(self, **kw): assembly = kw.get('assembly') or 'guess' t1 = track(kw['numerator'], chrmeta=assembly) t2 = track(kw['denominator'], chrmeta=assembly) format = kw.get('format') or t1.format wsize = int(kw.get('window_size') or size_def) self.log = kw.get('log', False) if isinstance(self.log, basestring): self.log = (self.log.lower() in ['1', 'true', 't', 'on']) try: self.pseudo = float(kw.get('pseudo')) except: self.pseudo = pseudo_def self.baseline = -log(self.pseudo, 2) try: self.threshold = float(kw.get('threshold')) except: self.threshold = threshold_def distribution = kw.get('distribution', False) if isinstance(distribution, basestring): distribution = (distribution.lower() in ['1', 'true', 't', 'on']) if distribution: genome_length = sum((v['length'] for v in t1.chrmeta.values())) self.shifts = list( poisson( float(genome_length) / float(self.sample_num), self.sample_num)) self.ratios = [] output = self.temporary_path(fname='ratios_%s-%s.%s' % (t1.name, t2.name, format)) with track(output, chrmeta=t1.chrmeta, fields=t1.fields, info={ 'datatype': 'quantitative', 'log': self.log, 'pseudocounts': self.pseudo, 'threshold': self.threshold, 'window_size': wsize }) as tout: for chrom, vchr in t1.chrmeta.iteritems(): if wsize > 1: s1 = window_smoothing(t1.read(chrom), window_size=wsize, step_size=1, featurewise=False) s2 = window_smoothing(t2.read(chrom), window_size=wsize, step_size=1, featurewise=False) else: s1 = t1.read(chrom) s2 = t2.read(chrom) s3 = merge_scores([s1, s2], method=self._divide) if distribution: s3 = FeatureStream(self._sample_stream(s3, vchr['length']), fields=s3.fields) tout.write(s3, chrom=chrom, clip=True) self.new_file(output, 'ratios') if distribution: pdf = self.temporary_path(fname='%s-%s_ratios_distribution.pdf' % (t1.name, t2.name)) density_boxplot(self.ratios, output=pdf, name="%s/%s (median=%.2f)" % (t1.name, t2.name, median(self.ratios))) self.new_file(pdf, 'boxplot') return self.display_time()