is_next_unmapped = 0x8 #next fragment in the template unmapped is_revcomp = 0x10 #SEQ being reverse complemented is_next_reversed = 0x20 #SEQ of the next fragment in the template being reversed is_read1 = 0x40 #the first fragment in the template is_read2 = 0x80 #the last fragment in the template is_secondary = 0x100 #secondary alignment is_failqc = 0x200 #not passing quality controls is_dupe = 0x400 #PCR or optical duplicate for fname in sys.argv[1:]: data = defaultdict(lambda: [None, None]) infile = pysam.Samfile(fname) outfile = pysam.Samfile(fname[:-4] + '_fixed_unsorted.bam', 'wb', template=infile) maxval, start = get_bam_length(infile) pbar = ProgressBar( maxval=maxval - start, widgets=[fname, ': ', Percentage(), ' ', Bar(), ' ', ETA(), ' ']) pbar.start() for read in infile: pbar.update(infile.tell() - start) qname = read.qname data[qname][read.is_read2] = read if None not in data[qname]:
new_header = in_sam.header.copy() new_SQ = [sub_dict for sub_dict in new_header['SQ'] if keepstr in sub_dict['SN']] new_header['SQ'] = new_SQ return new_header #Operate on each file independently for fname in sys.argv[1:]: data = defaultdict(lambda : [None, None]) infile = pysam.Samfile(fname) outfile = pysam.Samfile(fname[:-4] + '_rescued_unsorted.bam', 'wb', header=reheader(infile)) irefs = infile.references orefs = outfile.references maxval, start = get_bam_length(infile) # For progress bar goodness pbar = ProgressBar(maxval=maxval - start, widgets = [fname, ': ', Percentage(), ' ', Bar(), ' ', ETA(), ' ']) pbar.start() # Load up all the read pairs that mapped for read in infile: pbar.update(infile.tell() - start) qname = read.qname read.rname = orefs.index(irefs[read.rname]) data[qname][read.is_read2] = read # When we have both ends of the read,
is_next_unmapped = 0x8 #next fragment in the template unmapped is_revcomp = 0x10 #SEQ being reverse complemented is_next_reversed = 0x20 #SEQ of the next fragment in the template being reversed is_read1 = 0x40 #the first fragment in the template is_read2 = 0x80 #the last fragment in the template is_secondary = 0x100 #secondary alignment is_failqc = 0x200 #not passing quality controls is_dupe = 0x400 #PCR or optical duplicate for fname in sys.argv[1:]: data = defaultdict(lambda : [None, None]) infile = pysam.Samfile(fname) outfile = pysam.Samfile(fname[:-4] + '_fixed_unsorted.bam', 'wb', template=infile) maxval, start = get_bam_length(infile) pbar = ProgressBar(maxval=maxval - start, widgets = [fname, ': ', Percentage(), ' ', Bar(), ' ', ETA(), ' ']) pbar.start() for read in infile: pbar.update(infile.tell() - start) qname = read.qname data[qname][read.is_read2] = read if None not in data[qname]: read1, read2 = data.pop(qname) is_same = read1.rname == read2.rname