def window_core(obj, chrom): bamfile = pysam.AlignmentFile(obj.bamname, 'rb') chr_length = chrom[1] chrom = chrom[0] print chrom pos_vect = np.zeros((chr_length / obj.window_size,)) read_mid = 0 # Loop through BAM reads, adding midpoints to position vector for read in bamfile.fetch(chrom): read_mid = sam.read_mid_compute(obj, read) if read_mid < 0: continue read_mid_index = read_mid / obj.window_size if read_mid_index <= len(pos_vect) - 1: pos_vect[read_mid_index] = pos_vect[read_mid_index] + 1 # Pseudocount 0 windows if obj.pseudo: pos_vect[pos_vect==0] = 1 # Normalize by Reads per million and by reads per kilobase #window_correct = 1e6 * 1e3 / (float(obj.window_size) * obj.nreads) # Normalize by Reads per million window_correct = 1e6 * obj.nreads pos_vect = window_correct * pos_vect if obj.smooth > 0: pos_vect = signal_utils.smooth(pos_vect, obj.smooth, window="flat") if not obj.output_type == "none": write_values(obj, pos_vect, chrom, chr_length)
def window_core(obj, chrom): bamfile = pysam.Samfile(obj.bamname, 'rb') #pdb.set_trace() chr_length = chrom[1] chrom = chrom[0] print chrom pos_vect = np.zeros((chr_length / obj.window_size, )) read_mid = 0 # Loop through BAM reads, adding midpoints to position vector for read in bamfile.fetch(chrom): read_mid = sam.read_mid_compute(obj, read) if read_mid < 0: continue read_mid_index = read_mid / obj.window_size if read_mid_index <= len(pos_vect) - 1: pos_vect[read_mid_index] = pos_vect[read_mid_index] + 1 # Pseudocount 0 windows if obj.pseudo: pos_vect[pos_vect == 0] = 1 # Normalize by Reads per million and by reads per kilobase window_correct = 1e6 * 1e3 / (float(obj.window_size) * obj.nreads) pos_vect = window_correct * pos_vect if obj.smooth > 0: pos_vect = signal_utils.smooth(pos_vect, obj.smooth, window="flat") if not obj.output_type == "none": write_values(obj, pos_vect, chrom, chr_length)
def window_core(obj, chrom): bamfile = pysam.Samfile(obj.bamname, 'rb') #pdb.set_trace() chr_length = chrom[1] chrom = chrom[0] print chrom # Normalize by Reads per million and by reads per kilobase window_correct = 1e6 * 1e3 / (float(obj.window_size) * obj.nreads) pos_vect = np.zeros((chr_length / obj.window_size,)) read_mid = 0 for read in bamfile.fetch(chrom): read_mid = sam.read_mid_compute(obj, read) if read_mid < 0: continue read_mid_index = read_mid / obj.window_size if read_mid_index <= len(pos_vect) - 1: pos_vect[read_mid_index] = pos_vect[read_mid_index] + 1 out = "fixedStep chrom={0} start={1} step={2} span={2}\n".format(chrom, "1", obj.window_size) obj.wigfile.write(out) if obj.no_norm: for val in pos_vect: if obj.pseudo and val == 0: val = 1 obj.wigfile.write(str(val) + "\n") pos_vect = window_correct * pos_vect #pdb.set_trace() if obj.smooth > 0: pos_vect = signal_utils.smooth(pos_vect, obj.smooth, window="flat") if not obj.no_output: if obj.pseudo: pos_vect[pos_vect==0] = 1 if not obj.no_norm: pos_vect = window_correct * pos_vect for val in pos_vect: obj.wigfile.write(str(val) + "\n")
def window_bed(obj): bamfile = pysam.Samfile(obj.bamname, 'rb') #chr = chr_info[0] #chr_length = chr_info[1] #print chr # Normalize by Reads per million and by reads per kilobase window_correct = 1e6 * 1e3 / (float(obj.window_size) * obj.nreads) line = "" chrom = "" curr_chrom = "" bed_start = 0 bed_end = 0 pos_vect = "" read_mid = 0 read_mid_index = 0 # Loop through bed regions for line in obj.bed_file: line = line.split() chrom = line[0] if chrom != curr_chrom: print chrom curr_chrom = chrom bed_start = int(line[1]) - 1 bed_end = int(line[2]) - 1 # Setup position vector pos_vect = np.zeros(((bed_end - bed_start) / obj.window_size,)) # Loop through intersecting reads if not obj.full: for read in bamfile.fetch(reference=chrom, start=bed_start, end=bed_end): # Extract ends if obj.ends: if read.is_reverse: read_mid = read.aend - 1 - bed_start else: read_mid = read.pos - bed_start else: #pdb.set_trace() read_mid = sam.read_mid_compute(obj, read) if read_mid < 0: continue read_mid = read_mid - bed_start # Record ends within relative vector read_mid_index = read_mid / obj.window_size if read_mid_index >= 0 and read_mid_index <= len(pos_vect) - 1: pos_vect[read_mid_index] = pos_vect[read_mid_index] + 1 else: # Currently written for only window_size of 1 # pdb.set_trace() for column in bamfile.pileup(reference=chrom, start=bed_start, end=bed_end): if (column.pos >= bed_start and column.pos < bed_end): # pdb.set_trace() try: pos_vect[(column.pos - bed_start)] = column.n except: pdb.set_trace() if obj.norm_by_mean: if not chrom in obj.window_correct: total_n = 0 count_n = 0 print "Computing average values..." for column in bamfile.pileup(reference=chrom): total_n += column.n count_n += 1 obj.window_correct[chrom] = 1 / (total_n / float(count_n)) print "Average coverage = {0}".format(obj.window_correct[chrom]) window_correct = obj.window_correct[chrom] # Write WIG, header for each bed region out = "fixedStep chrom={0} start={1} step={2} span={2}\n".format(chrom, str(bed_start), obj.window_size) obj.wigfile.write(out) for val in pos_vect: obj.wigfile.write(str(window_correct * float(val)) + "\n")