def go(args): bed = read_bed_file(args.bedfile) infile = pysam.AlignmentFile(args.alignment, "rb") for s in infile: #print s.get_aligned_pairs() #print ">%s\n%s" % (s.query_name, s.query_alignment_sequence) p1 = find_primer(bed, s.reference_start, '+') p2 = find_primer(bed, s.reference_end, '-') primer_start = p1[2]['start'] # start is the 5' primer_end = p2[2]['start'] query_align_start = find_query_pos(s, primer_start) query_align_end = find_query_pos(s, primer_end) print >> sys.stderr, "%s\t%s\t%s\t%s" % (primer_start, primer_end, primer_end - primer_start, s.query_length) startpos = max(0, query_align_start - 40) endpos = min(query_align_end + 40, s.query_length) print ">%s\n%s" % (s.query_name, s.query_sequence[startpos:endpos])
def go(args): if args.report: reportfh = open(args.report, "w") bed = read_bed_file(args.bedfile) counter = defaultdict(int) infile = pysam.AlignmentFile("-", "rb") outfile = pysam.AlignmentFile("-", "wh", template=infile) for s in infile: cigar = copy(s.cigartuples) ## logic - if alignment start site is _before_ but within X bases of ## a primer site, trim it off if s.is_unmapped: sys.stderr.write("%s skipped as unmapped" % (s.query_name)) continue if s.is_supplementary: sys.stderr.write("%s skipped as supplementary" % (s.query_name)) continue p1 = find_primer(bed, s.reference_start, '+') p2 = find_primer(bed, s.reference_end, '-') report = "%s\t%s\t%s\t%s_%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % ( s.query_name, s.reference_start, s.reference_end, p1[2]['Primer_ID'], p2[2]['Primer_ID'], p1[2]['Primer_ID'], abs(p1[1]), p2[2]['Primer_ID'], abs(p2[1]), s.is_secondary, s.is_supplementary, p1[2]['start'], p2[2]['end']) if args.report: print(report, file=reportfh) if args.verbose: sys.stderr.write(report) ## if the alignment starts before the end of the primer, trim to that position try: if args.start: primer_position = p1[2]['start'] else: primer_position = p1[2]['end'] if s.reference_start < primer_position: trim(cigar, s, primer_position, 0) else: if args.verbose: sys.stderr.write("ref start %s >= primer_position %s" % (s.reference_start, primer_position)) if args.start: primer_position = p2[2]['start'] else: primer_position = p2[2]['end'] if s.reference_end > primer_position: trim(cigar, s, primer_position, 1) else: if args.verbose: sys.stderr.write("ref end %s >= primer_position %s" % (s.reference_end, primer_position)) except Exception as e: sys.stderr.write("problem %s" % (e, )) pass if args.normalise: pair = "%s-%s-%d" % (p1[2]['Primer_ID'], p2[2]['Primer_ID'], s.is_reverse) counter[pair] += 1 if counter[pair] > args.normalise: continue ## if the alignment starts before the end of the primer, trim to that position # trim(s, s.reference_start + 40, 0) # trim(s, s.reference_end - 40, 1) # # outfile.write(s) # except Exception: # pass if not check_still_matching_bases(s): continue outfile.write(s) reportfh.close()
if not end: s.pos = pos - extra #print >>sys.stderr, "New pos: %s" % (s.pos) if end: cigar.append((4, eaten)) else: cigar.insert(0, (4, eaten)) oldcigarstring = s.cigarstring s.cigartuples = cigar #print >>sys.stderr, s.query_name, oldcigarstring[0:50], s.cigarstring[0:50] bed = read_bed_file('all') def find_primer(pos, direction): # {'Amplicon_size': '1874', 'end': 7651, '#Region': 'region_4', 'start': 7633, 'Coords': '7633', "Sequence_(5-3')": 'GCTGGCCCGAAATATGGT', 'Primer_ID': '16_R'} from operator import itemgetter closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0)) return closest infile = pysam.AlignmentFile("-", "rb") outfile = pysam.AlignmentFile("-", "wh", template=infile) for s in infile:
if not end: s.pos = pos - extra #print >>sys.stderr, "New pos: %s" % (s.pos) if end: cigar.append((4, eaten)) else: cigar.insert(0, (4, eaten)) oldcigarstring = s.cigarstring s.cigartuples = cigar #print >>sys.stderr, s.query_name, oldcigarstring[0:50], s.cigarstring[0:50] bed = read_bed_file('all') def find_primer(pos, direction): # {'Amplicon_size': '1874', 'end': 7651, '#Region': 'region_4', 'start': 7633, 'Coords': '7633', "Sequence_(5-3')": 'GCTGGCCCGAAATATGGT', 'Primer_ID': '16_R'} from operator import itemgetter closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0)) return closest infile = pysam.AlignmentFile("-", "rb") outfile = pysam.AlignmentFile("-", "wh", template=infile) for s in infile: cigar = copy(s.cigartuples) if len(sys.argv) > 1: if not s.query_name.startswith(sys.argv[1]):
#MASKED_POSITIONS.extend([n for n in xrange(17135, 17169)]) #MASKED_POSITIONS.extend([n for n in xrange(5742, 5758)]) #MASKED_POSITIONS = [2282, 14011, 5312, 5313] #MASKED_POSITIONS = [2282, 11973] MASKED_POSITIONS = [2282] reference = sys.argv[1] vcffile = sys.argv[2] bamfile = sys.argv[3] primerset = sys.argv[4] #MASKED_POSITIONS.extend([int(n) for n in sys.argv[5].split(",")]) DEPTH_THRESHOLD = 25 bed = read_bed_file(primerset) for primer in bed: MASKED_POSITIONS.extend([n for n in xrange(primer['start'], primer['end'])]) def collect_depths(bamfile): if not os.path.exists(bamfile): raise SystemExit("bamfile %s doesn't exist" % (bamfile,)) print >>sys.stderr, bamfile p = subprocess.Popen(['samtools', 'depth', bamfile], stdout=subprocess.PIPE) out, err = p.communicate() depths = defaultdict(dict) for ln in out.split("\n"): if ln: