def __init__(self, bamname, outname, track_name, window_size, extend, pe, pseudo, ends, smooth, norm_by_mean, no_norm, output_type): self.bamname = bamname self.bamfile = pysam.AlignmentFile(bamname, 'rb') self.outname = outname self.output_type = output_type self.outfile = "" self.track_name = track_name if output_type == "wig": self.outfile = open(outname, 'w') elif output_type == "h5": self.outfile = tb.openFile(outname, "a") test = track_util.checkIfNodeExists(self.outfile, self.track_name, create=True, accept_existence=False) if test: sys.exit() elif output_type == "none": pass self.window_size = atoi(window_size) self.pe = pe self.extend = int(extend) self.nreads = self.bamfile.mapped self.pseudo = pseudo self.ends = ends self.smooth = smooth self.norm_by_mean = norm_by_mean self.no_norm = no_norm #pdb.set_trace() self.window_correct = {} self.norm_path = ".".join([bamname, "chr_means_0"]) if os.path.exists(self.norm_path): for line in open(self.norm_path, 'r'): sline = line.split() self.window_correct[sline[0]] = float(sline[1]) if pe: # If strand separated BAM if re.search("plus", bamname) or re.search("minus", bamname): self.nreads = self.bamfile.mapped # Normalize to number of fragments else: self.nreads = self.bamfile.mapped / 2 else: self.nreads = self.bamfile.mapped print self.nreads self.chr_lengths = self.bamfile.lengths self.chrs_queue = [] for index in range(self.bamfile.nreferences): self.chrs_queue.append((self.bamfile.references[index], self.bamfile.lengths[index]))
def run(self): out_track_name = self.in_track._v_name print out_track_name test = tutil.checkIfNodeExists(self.out_track, out_track_name) if test: return for chr in self.in_track._f_iterNodes(): chr_name = chr._v_name print chr_name track_chr = self.in_track._f_getChild(chr_name) out_track_chr = self.int_normalize(track_chr) self.out_track.createArray("/" + out_track_name, chr_name, out_track_chr) for name in track_chr._v_attrs._f_list(): self.out_track.setNodeAttr("/" + "/".join([out_track_name, chr_name]), name, track_chr._v_attrs[name])
def run(self): out_track_name = self.in_track._v_name + "_smooth" + str(self.smooth) print out_track_name #pdb.set_trace() test = tutil.checkIfNodeExists(self.out_track, out_track_name, True, False) if test: return for chrom in self.in_track._f_iterNodes(): chr_name = chrom._v_name print chr_name if chr_name == "unknown": continue track_chr = self.in_track._f_getChild(chr_name) out_track_chr = signal_utils.smooth(track_chr[:], self.smooth, window="flat") self.out_track.createArray("/" + out_track_name, chr_name, out_track_chr) for name in track_chr._v_attrs._f_list(): self.out_track.setNodeAttr("/" + "/".join([out_track_name, chr_name]), name, track_chr._v_attrs[name])
def run(self): # various machinations to keep last name component last track_name_split = self.in_track._v_name.split("_") out_track_name = "_".join(["_".join(track_name_split[0:-1]), "dampen" + str(self.dampen), track_name_split[-1]]) print out_track_name # pdb.set_trace() test = tutil.checkIfNodeExists(self.out_track, out_track_name, True, False) if test: return for chrom in self.in_track._f_iterNodes(): chr_name = chrom._v_name print chr_name track_chr = self.in_track._f_getChild(chr_name) cutoff = self.dampen * np.std(track_chr[:]) mask = ~(np.abs(track_chr[:] - np.mean(track_chr[:])) < cutoff) out_track_chr = track_chr[:] out_track_chr[mask] = cutoff self.out_track.createArray("/" + out_track_name, chr_name, out_track_chr) for name in track_chr._v_attrs._f_list(): self.out_track.setNodeAttr("/" + "/".join([out_track_name, chr_name]), name, track_chr._v_attrs[name])
def __init__(self, bamname, outname, track_name, window_size, extend, pe, pseudo, ends, smooth, norm_by_mean, no_norm, output_type): self.bamname = bamname self.bamfile = pysam.Samfile(bamname, 'rb') self.outname = outname self.output_type = output_type self.outfile = "" self.track_name = track_name if output_type == "wig": self.outfile = open(outname, 'w') elif output_type == "h5": self.outfile = tb.openFile(outname, "a") test = track_util.checkIfNodeExists(self.outfile, self.track_name, create=True, accept_existence=False) if test: sys.exit() elif output_type == "none": pass self.window_size = atoi(window_size) self.pe = pe self.extend = int(extend) self.nreads = self.bamfile.mapped # self.bed_name = bed # self.bed_file = "" # if bed: # self.bed_file = open(bed) self.pseudo = pseudo # self.full = full self.ends = ends self.smooth = smooth self.norm_by_mean = norm_by_mean self.no_norm = no_norm #pdb.set_trace() self.window_correct = {} self.norm_path = ".".join([bamname, "chr_means_0"]) if os.path.exists(self.norm_path): for line in open(self.norm_path, 'r'): sline = line.split() self.window_correct[sline[0]] = float(sline[1]) if pe: # If strand separated BAM if re.search("plus", bamname) or re.search("minus", bamname): self.nreads = self.bamfile.mapped # Normalize to number of fragments else: self.nreads = self.bamfile.mapped / 2 else: self.nreads = self.bamfile.mapped print self.nreads self.chr_lengths = self.bamfile.lengths self.chrs_queue = [] for index in range(self.bamfile.nreferences): self.chrs_queue.append( (self.bamfile.references[index], self.bamfile.lengths[index]))