def run(self): spans = collections.defaultdict(list) for item in legion.parallel_imap(self._load_bam, self.filenames): for key,value in item.items(): spans[key].extend(value) grace.status('Calling peaks') f = open(self.prefix+'.gff', 'wb') annotation.write_gff3_header(f) n = 0 for (rname, strand), span_list in spans.items(): depth = [ 0.0 ] * (1+max( item[1] for item in span_list )) for start, end in span_list: depth[start] += 1.0 depth[end] -= 1.0 for i in xrange(1,len(depth)): depth[i] += depth[i-1] for start, end in self._find_spans(depth): if end-self.lap-start <= 0: continue n += 1 id = 'peak%d' % n ann = annotation.Annotation() ann.source = 'nesoni' ann.type = self.type ann.seqid = rname ann.start = start ann.end = end - self.lap ann.strand = strand ann.score = None ann.phase = None ann.attr = { 'id' : id, 'color' : '#00ff00' if strand > 0 else '#0000ff' if strand < 0 else '#008080', } print >> f, ann.as_gff() f.flush() f.close() self.log.datum('-','called peaks',n) grace.status('')
def run(self): assert self.what in ('fragment','5prime','3prime'), 'Unknown option for --what.' #assert self.moderation > 0.0, '--moderation must be greater than zero.' #assert self.power > 0.0, '--power must be greater than zero.' #assert self.width_power >= 1.0, '--width-power must be greater than or equal to one.' #if self.filter == 'poly': # use_bam_filename = 'alignments.bam' # use_only_top = True # use_only_monogamous = False # expect_multiple_alignments = True #elif self.filter == 'mono': # use_bam_filename = 'alignments.bam' # use_only_top = True # use_only_monogamous = True # expect_multiple_alignments = True #else: # assert self.filter == 'existing', 'Unrecognized filtering mode' # use_bam_filename = 'alignments_filtered.bam' # use_only_top = False # use_only_monogamous = False # expect_multiple_alignments = False spans = collections.defaultdict(list) for item in legion.parallel_imap(self._load_bam, self.filenames): for key,value in item.items(): spans[key].extend(value) #for i, filename in enumerate(self.filenames): # if os.path.isdir(filename): # filename = os.path.join(filename, use_bam_filename) # # n = 0 # for read_name, fragment_alignments, unmapped in \ # sam.bam_iter_fragments( # filename, # 'Scanning sample %d of %d' % (i+1,len(self.filenames))): # if not fragment_alignments: # continue # # if use_only_top: # fragment_scores = [ sum( al.get_AS() for al in item ) for item in fragment_alignments ] # best_score = max(fragment_scores) # fragment_alignments = [ # item # for item, score in zip(fragment_alignments, fragment_scores) # if score >= best_score ] # # for alignments in fragment_alignments: # if self.strand_specific: # strand = -1 if alignments[0].flag&sam.FLAG_REVERSE else 1 # else: # strand = 0 # # start = min(item.pos-1 for item in alignments) # end = max(item.pos+item.length-1 for item in alignments) # if end-start <= self.trim*2: continue # # rname = alignments[0].rname # spans[(rname, strand)].append((start+self.trim,end-self.trim)) # # n += 1 # #if n > 100000: break # #if self.deduplicate: # for key in spans: # spans[key] = list(set(spans[key])) grace.status('Calling peaks') f = open(self.prefix+'.gff', 'wb') annotation.write_gff3_header(f) n = 0 for (rname, strand), span_list in spans.items(): depth = [ 0.0 ] * (1+max( item[1] for item in span_list )) for start, end in span_list: depth[start] += 1.0 depth[end] -= 1.0 if self.crosstalk and strand and (rname,-strand) in spans: for start, end in spans[(rname,-strand)]: if start < len(depth): depth[start] -= self.crosstalk if end < len(depth): depth[end] += self.crosstalk for i in xrange(1,len(depth)): depth[i] += depth[i-1] if self.crosstalk: for i in xrange(len(depth)): depth[i] = max(0.0,depth[i]) #import pylab #pylab.plot(depth) for start, end in self._find_spans(depth): #pylab.axvspan(start-0.5,end-0.5,alpha=0.25) if end-self.lap-start <= 0: continue n += 1 id = 'peak%d' % n #if strand == -1: # id = '%s-%d..%d' % (rname,start,end+1) #elif strand == 0: # id = '%s.%d..%d' % (rname,start+1,end) #else: # id = '%s+%d..%d' % (rname,start+1,end) ann = annotation.Annotation() ann.source = 'nesoni' ann.type = self.type ann.seqid = rname ann.start = start ann.end = end - self.lap ann.strand = strand ann.score = None ann.phase = None ann.attr = { 'id' : id, 'color' : '#00ff00' if strand > 0 else '#0000ff' if strand < 0 else '#008080', } print >> f, ann.as_gff() f.flush() #pylab.show() f.close() self.log.datum('-','called peaks',n) grace.status('')