is_next_unmapped = 0x8  #next fragment in the template unmapped
is_revcomp = 0x10  #SEQ being reverse complemented
is_next_reversed = 0x20  #SEQ of the next fragment in the template being reversed
is_read1 = 0x40  #the first fragment in the template
is_read2 = 0x80  #the last fragment in the template
is_secondary = 0x100  #secondary alignment
is_failqc = 0x200  #not passing quality controls
is_dupe = 0x400  #PCR or optical duplicate

for fname in sys.argv[1:]:
    data = defaultdict(lambda: [None, None])
    infile = pysam.Samfile(fname)
    outfile = pysam.Samfile(fname[:-4] + '_fixed_unsorted.bam',
                            'wb',
                            template=infile)
    maxval, start = get_bam_length(infile)

    pbar = ProgressBar(
        maxval=maxval - start,
        widgets=[fname, ': ',
                 Percentage(), ' ',
                 Bar(), ' ',
                 ETA(), ' '])
    pbar.start()

    for read in infile:
        pbar.update(infile.tell() - start)
        qname = read.qname
        data[qname][read.is_read2] = read

        if None not in data[qname]:
    new_header = in_sam.header.copy()
    new_SQ = [sub_dict for sub_dict in new_header['SQ']
              if keepstr in sub_dict['SN']]
    new_header['SQ'] = new_SQ
    return new_header


#Operate on each file independently
for fname in sys.argv[1:]:
    data = defaultdict(lambda : [None, None])
    infile = pysam.Samfile(fname)
    outfile = pysam.Samfile(fname[:-4] + '_rescued_unsorted.bam', 'wb',
                            header=reheader(infile))
    irefs = infile.references
    orefs = outfile.references
    maxval, start = get_bam_length(infile) # For progress bar goodness


    pbar = ProgressBar(maxval=maxval - start,
                       widgets = [fname, ': ', Percentage(), ' ', Bar(), ' ',
                                  ETA(), ' '])
    pbar.start()

    # Load up all the read pairs that mapped
    for read in infile:
        pbar.update(infile.tell() - start)
        qname = read.qname
        read.rname = orefs.index(irefs[read.rname])
        data[qname][read.is_read2] = read

        # When we have both ends of the read,
Example #3
0
is_next_unmapped = 0x8      #next fragment in the template unmapped
is_revcomp = 0x10           #SEQ being reverse complemented
is_next_reversed = 0x20     #SEQ of the next fragment in the template being reversed 
is_read1 = 0x40             #the first fragment in the template
is_read2 = 0x80             #the last fragment in the template
is_secondary = 0x100        #secondary alignment
is_failqc = 0x200           #not passing quality controls 
is_dupe = 0x400             #PCR or optical duplicate


for fname in sys.argv[1:]:
    data = defaultdict(lambda : [None, None])
    infile = pysam.Samfile(fname)
    outfile = pysam.Samfile(fname[:-4] + '_fixed_unsorted.bam', 'wb',
                            template=infile)
    maxval, start = get_bam_length(infile)


    pbar = ProgressBar(maxval=maxval - start,
                       widgets = [fname, ': ', Percentage(), ' ', Bar(), ' ',
                                  ETA(), ' '])
    pbar.start()

    for read in infile:
        pbar.update(infile.tell() - start)
        qname = read.qname
        data[qname][read.is_read2] = read

        if None not in data[qname]:
            read1, read2 = data.pop(qname)
            is_same = read1.rname == read2.rname