예제 #1
0
    def run(self):
        spans = collections.defaultdict(list)
        
        for item in legion.parallel_imap(self._load_bam, self.filenames):
            for key,value in item.items():
                spans[key].extend(value)

        grace.status('Calling peaks')

        f = open(self.prefix+'.gff', 'wb')
        annotation.write_gff3_header(f)
        
        n = 0

        for (rname, strand), span_list in spans.items():
            depth = [ 0.0 ] * (1+max( item[1] for item in span_list ))
            for start, end in span_list:
                depth[start] += 1.0
                depth[end] -= 1.0
            
            for i in xrange(1,len(depth)):
                depth[i] += depth[i-1]

            for start, end in self._find_spans(depth):
                if end-self.lap-start <= 0: continue
                
                n += 1
                
                id = 'peak%d' % n
                
                ann = annotation.Annotation()
                ann.source = 'nesoni'
                ann.type = self.type
                ann.seqid = rname
                ann.start = start
                ann.end = end - self.lap
                ann.strand = strand
                ann.score = None
                ann.phase = None
                ann.attr = { 
                    'id' : id,
                    'color' : '#00ff00' if strand > 0 else '#0000ff' if strand < 0 else '#008080',
                    }
                print >> f, ann.as_gff()
            f.flush()

        f.close()
        
        self.log.datum('-','called peaks',n)
        
        grace.status('')
예제 #2
0
파일: peaks.py 프로젝트: mscook/nesoni
    def run(self):
        assert self.what in ('fragment','5prime','3prime'), 'Unknown option for --what.'
        #assert self.moderation > 0.0, '--moderation must be greater than zero.'
        #assert self.power > 0.0, '--power must be greater than zero.'
        #assert self.width_power >= 1.0, '--width-power must be greater than or equal to one.'
    
        #if self.filter == 'poly':
        #    use_bam_filename = 'alignments.bam'
        #    use_only_top = True
        #    use_only_monogamous = False
        #    expect_multiple_alignments = True
        #elif self.filter == 'mono': 
        #    use_bam_filename = 'alignments.bam'
        #    use_only_top = True
        #    use_only_monogamous = True
        #    expect_multiple_alignments = True
        #else:
        #    assert self.filter == 'existing', 'Unrecognized filtering mode'
        #    use_bam_filename = 'alignments_filtered.bam'
        #    use_only_top = False
        #    use_only_monogamous = False
        #    expect_multiple_alignments = False
                    
        spans = collections.defaultdict(list)
        
        for item in legion.parallel_imap(self._load_bam, self.filenames):
            for key,value in item.items():
                spans[key].extend(value)
        
        #for i, filename in enumerate(self.filenames):
        #    if os.path.isdir(filename):
        #        filename = os.path.join(filename, use_bam_filename)
        #    
        #    n = 0
        #    for read_name, fragment_alignments, unmapped in \
        #            sam.bam_iter_fragments(
        #                filename, 
        #                'Scanning sample %d of %d' % (i+1,len(self.filenames))):
        #        if not fragment_alignments:
        #            continue
        #            
        #        if use_only_top:
        #            fragment_scores = [ sum( al.get_AS() for al in item ) for item in fragment_alignments ]            
        #            best_score = max(fragment_scores)
        #            fragment_alignments = [ 
        #                item 
        #                for item, score in zip(fragment_alignments, fragment_scores)
        #                if score >= best_score ]            
        #        
        #        for alignments in fragment_alignments:
        #            if self.strand_specific:
        #                strand = -1 if alignments[0].flag&sam.FLAG_REVERSE else 1
        #            else:
        #                strand = 0
        #        
        #            start = min(item.pos-1 for item in alignments)
        #            end = max(item.pos+item.length-1 for item in alignments)
        #            if end-start <= self.trim*2: continue
        #            
        #            rname = alignments[0].rname                    
        #            spans[(rname, strand)].append((start+self.trim,end-self.trim))
        #        
        #        n += 1
        #        #if n > 100000: break
        #
        #if self.deduplicate:
        #    for key in spans:
        #        spans[key] = list(set(spans[key]))

        grace.status('Calling peaks')

        f = open(self.prefix+'.gff', 'wb')
        annotation.write_gff3_header(f)
        
        n = 0

        for (rname, strand), span_list in spans.items():
            depth = [ 0.0 ] * (1+max( item[1] for item in span_list ))
            for start, end in span_list:
                depth[start] += 1.0
                depth[end] -= 1.0
            
            if self.crosstalk and strand and (rname,-strand) in spans:
                for start, end in spans[(rname,-strand)]:
                    if start < len(depth): depth[start] -= self.crosstalk
                    if end < len(depth): depth[end] += self.crosstalk
            
            for i in xrange(1,len(depth)):
                depth[i] += depth[i-1]

            if self.crosstalk:
                for i in xrange(len(depth)):
                    depth[i] = max(0.0,depth[i])

            #import pylab
            #pylab.plot(depth)
            
            for start, end in self._find_spans(depth):
                #pylab.axvspan(start-0.5,end-0.5,alpha=0.25)
                
                if end-self.lap-start <= 0: continue
                
                n += 1
                
                id = 'peak%d' % n
                
                #if strand == -1:
                #    id = '%s-%d..%d' % (rname,start,end+1)
                #elif strand == 0:
                #    id = '%s.%d..%d' % (rname,start+1,end)
                #else:
                #    id = '%s+%d..%d' % (rname,start+1,end)
                
                ann = annotation.Annotation()
                ann.source = 'nesoni'
                ann.type = self.type
                ann.seqid = rname
                ann.start = start
                ann.end = end - self.lap
                ann.strand = strand
                ann.score = None
                ann.phase = None
                ann.attr = { 
                    'id' : id,
                    'color' : '#00ff00' if strand > 0 else '#0000ff' if strand < 0 else '#008080',
                    }
                print >> f, ann.as_gff()
            f.flush()
            
            #pylab.show()

        f.close()
        
        self.log.datum('-','called peaks',n)
        
        grace.status('')