def __init__(self, bamname, outname, track_name, 
                 window_size, extend, pe, pseudo,
                 ends, smooth, norm_by_mean, no_norm, output_type):
        
        self.bamname = bamname
        self.bamfile = pysam.AlignmentFile(bamname, 'rb')

        self.outname = outname
        self.output_type = output_type
        self.outfile = ""

        self.track_name = track_name
        if output_type == "wig":
            self.outfile = open(outname, 'w')
        elif output_type == "h5":
            self.outfile = tb.openFile(outname, "a")
            test = track_util.checkIfNodeExists(self.outfile, self.track_name, 
                                                create=True, accept_existence=False)
            if test: sys.exit()
        elif output_type == "none":
            pass

        self.window_size = atoi(window_size)
        self.pe = pe
        self.extend = int(extend)

        self.nreads = self.bamfile.mapped

        self.pseudo = pseudo
        self.ends = ends
        self.smooth = smooth
        self.norm_by_mean = norm_by_mean
        self.no_norm = no_norm

        #pdb.set_trace()
        self.window_correct = {}
        self.norm_path = ".".join([bamname, "chr_means_0"])
        if os.path.exists(self.norm_path):
            for line in open(self.norm_path, 'r'):
                sline = line.split()
                self.window_correct[sline[0]] = float(sline[1])

        if pe:
            # If strand separated BAM
            if re.search("plus", bamname) or re.search("minus", bamname):
                self.nreads = self.bamfile.mapped
            # Normalize to number of fragments
            else:    
                self.nreads = self.bamfile.mapped / 2
        else:
            self.nreads = self.bamfile.mapped
        print self.nreads

        self.chr_lengths = self.bamfile.lengths
        self.chrs_queue = []
        for index in range(self.bamfile.nreferences):
            self.chrs_queue.append((self.bamfile.references[index], self.bamfile.lengths[index]))
 def run(self):
     out_track_name = self.in_track._v_name
     print out_track_name
     
     test = tutil.checkIfNodeExists(self.out_track, out_track_name)
     if test: return
     
     for chr in self.in_track._f_iterNodes():
         chr_name = chr._v_name
         print chr_name
         track_chr = self.in_track._f_getChild(chr_name)
         
         out_track_chr = self.int_normalize(track_chr)
         
         self.out_track.createArray("/" + out_track_name, chr_name, out_track_chr)
         for name in track_chr._v_attrs._f_list():
             self.out_track.setNodeAttr("/" + "/".join([out_track_name, chr_name]), name, track_chr._v_attrs[name])
 def run(self):
     out_track_name = self.in_track._v_name + "_smooth" + str(self.smooth)
     print out_track_name
     #pdb.set_trace()
     test = tutil.checkIfNodeExists(self.out_track, out_track_name, True, False)
     if test: return
     
     for chrom in self.in_track._f_iterNodes():
         chr_name = chrom._v_name
         print chr_name
         if chr_name == "unknown": continue
         track_chr = self.in_track._f_getChild(chr_name)
         
         out_track_chr = signal_utils.smooth(track_chr[:], self.smooth, window="flat")
         
         self.out_track.createArray("/" + out_track_name, chr_name, out_track_chr)
         for name in track_chr._v_attrs._f_list():
             self.out_track.setNodeAttr("/" + "/".join([out_track_name, chr_name]), name, track_chr._v_attrs[name])
    def run(self):
        # various machinations to keep last name component last
        track_name_split = self.in_track._v_name.split("_")
        out_track_name = "_".join(["_".join(track_name_split[0:-1]), "dampen" + str(self.dampen), track_name_split[-1]])
        print out_track_name
        # pdb.set_trace()
        test = tutil.checkIfNodeExists(self.out_track, out_track_name, True, False)
        if test:
            return

        for chrom in self.in_track._f_iterNodes():
            chr_name = chrom._v_name
            print chr_name
            track_chr = self.in_track._f_getChild(chr_name)

            cutoff = self.dampen * np.std(track_chr[:])
            mask = ~(np.abs(track_chr[:] - np.mean(track_chr[:])) < cutoff)
            out_track_chr = track_chr[:]
            out_track_chr[mask] = cutoff
            self.out_track.createArray("/" + out_track_name, chr_name, out_track_chr)
            for name in track_chr._v_attrs._f_list():
                self.out_track.setNodeAttr("/" + "/".join([out_track_name, chr_name]), name, track_chr._v_attrs[name])
Example #5
0
    def __init__(self, bamname, outname, track_name, window_size, extend, pe,
                 pseudo, ends, smooth, norm_by_mean, no_norm, output_type):

        self.bamname = bamname
        self.bamfile = pysam.Samfile(bamname, 'rb')
        self.outname = outname
        self.output_type = output_type
        self.outfile = ""
        self.track_name = track_name
        if output_type == "wig":
            self.outfile = open(outname, 'w')
        elif output_type == "h5":
            self.outfile = tb.openFile(outname, "a")
            test = track_util.checkIfNodeExists(self.outfile,
                                                self.track_name,
                                                create=True,
                                                accept_existence=False)
            if test: sys.exit()
        elif output_type == "none":
            pass

        self.window_size = atoi(window_size)
        self.pe = pe
        self.extend = int(extend)

        self.nreads = self.bamfile.mapped
        #        self.bed_name = bed
        #        self.bed_file = ""
        #        if bed:
        #            self.bed_file = open(bed)
        self.pseudo = pseudo
        #        self.full = full
        self.ends = ends
        self.smooth = smooth
        self.norm_by_mean = norm_by_mean
        self.no_norm = no_norm

        #pdb.set_trace()
        self.window_correct = {}
        self.norm_path = ".".join([bamname, "chr_means_0"])
        if os.path.exists(self.norm_path):
            for line in open(self.norm_path, 'r'):
                sline = line.split()
                self.window_correct[sline[0]] = float(sline[1])

        if pe:
            # If strand separated BAM
            if re.search("plus", bamname) or re.search("minus", bamname):
                self.nreads = self.bamfile.mapped
            # Normalize to number of fragments
            else:
                self.nreads = self.bamfile.mapped / 2
        else:
            self.nreads = self.bamfile.mapped
        print self.nreads

        self.chr_lengths = self.bamfile.lengths
        self.chrs_queue = []
        for index in range(self.bamfile.nreferences):
            self.chrs_queue.append(
                (self.bamfile.references[index], self.bamfile.lengths[index]))