コード例 #1
0
ファイル: rnaseqtrack.py プロジェクト: mkiyer/pytrackfactory
 def __init__(self, hdf_group, pe=False, strand=False, allele=False):
     super(RnaseqTrack, self).__init__(hdf_group)        
     h5file = self._get_hdf_file()
     if JUNCTION_GROUP not in self.hdf_group:
         junc_group = h5file.createGroup(self.hdf_group, JUNCTION_GROUP)
     else:
         junc_group = self.hdf_group._f_getChild(JUNCTION_GROUP)            
     self.junc_track = IntervalTrack(junc_group, junction_dtype, 
                                     junction_expectedrows)
     if COVERAGE_GROUP not in self.hdf_group:
         cov_group = h5file.createGroup(self.hdf_group, COVERAGE_GROUP)
     else:
         cov_group = self.hdf_group._f_getChild(COVERAGE_GROUP)
     self.cov_track = VectorTrack(cov_group, pe=pe, strand=strand, 
                                  allele=allele)
コード例 #2
0
ファイル: rnaseqtrack.py プロジェクト: mkiyer/pytrackfactory
class RnaseqTrack(Track):
    '''contains both genomic coverage data (array track) and splice
    junction data (interval track)
    '''
    def __init__(self, hdf_group, pe=False, strand=False, allele=False):
        super(RnaseqTrack, self).__init__(hdf_group)        
        h5file = self._get_hdf_file()
        if JUNCTION_GROUP not in self.hdf_group:
            junc_group = h5file.createGroup(self.hdf_group, JUNCTION_GROUP)
        else:
            junc_group = self.hdf_group._f_getChild(JUNCTION_GROUP)            
        self.junc_track = IntervalTrack(junc_group, junction_dtype, 
                                        junction_expectedrows)
        if COVERAGE_GROUP not in self.hdf_group:
            cov_group = h5file.createGroup(self.hdf_group, COVERAGE_GROUP)
        else:
            cov_group = self.hdf_group._f_getChild(COVERAGE_GROUP)
        self.cov_track = VectorTrack(cov_group, pe=pe, strand=strand, 
                                     allele=allele)

    def get_junction_track(self):
        junc_group = self.hdf_group._f_getChild(JUNCTION_GROUP)
        return IntervalTrack(junc_group)
    def get_coverage_track(self):
        cov_group = self.hdf_group._f_getChild(COVERAGE_GROUP)
        return VectorTrack(cov_group)

    def fromtophat(self, accepted_hits_bam, junctions_bed,
                   max_multimaps=None,
                   flip_read2_strand=True):
        # insert splice junction track
        track_name = self.hdf_group._v_name
        rnames = set(self.get_rnames())
        logging.info("[RnaseqTrack] adding junctions")        
        junc_iter = tophat_bed_to_juncs(track_name, open(junctions_bed))        
        for junc in junc_iter:
            if junc[REF_COL_NAME] not in rnames:
                logging.debug('Skipping junc %s' % str(junc))
            else:
                junc['id'] = self.junc_track.num_intervals
                self.junc_track.add(junc)
        self.junc_track.index(persist=True)
        # insert coverage track
        logging.info("creating coverage track")        
        bamfh = pysam.Samfile(accepted_hits_bam, "rb")        
        #cmdline = bamfh.header["PG"][0]["CL"]        
        #re.search(r'--max-multihits(?:\s+|=)(\d+)', cmdline)
        intervalcoviter = BamCoverageIterator(bamfh,
                                              norm_rlen=True,
                                              num_hits_tag="NH",
                                              hit_prob_tag=None,
                                              max_multimaps=None,
                                              keep_dup=True,
                                              keep_qcfail=False,
                                              flip_read2_strand=True)
        self.cov_track.fromintervals(intervalcoviter)
        # store coverage statistics to allow normalization calculations
        stats = intervalcoviter.stats
        logging.debug("\tProcessed '%d' valid reads" % (stats.num_reads))
        logging.debug("\tTotal coverage '%f'" % (stats.total_cov))
        bamfh.close()