def go(args):
    bed = read_bed_file(args.bedfile)

    infile = pysam.AlignmentFile(args.alignment, "rb")
    for s in infile:
        #print s.get_aligned_pairs()
        #print ">%s\n%s" % (s.query_name, s.query_alignment_sequence)

        p1 = find_primer(bed, s.reference_start, '+')
        p2 = find_primer(bed, s.reference_end, '-')

        primer_start = p1[2]['start']
        # start is the 5'
        primer_end = p2[2]['start']

        query_align_start = find_query_pos(s, primer_start)
        query_align_end = find_query_pos(s, primer_end)

        print >> sys.stderr, "%s\t%s\t%s\t%s" % (primer_start, primer_end,
                                                 primer_end - primer_start,
                                                 s.query_length)

        startpos = max(0, query_align_start - 40)
        endpos = min(query_align_end + 40, s.query_length)

        print ">%s\n%s" % (s.query_name, s.query_sequence[startpos:endpos])
Esempio n. 2
0
def go(args):
    if args.report:
        reportfh = open(args.report, "w")

    bed = read_bed_file(args.bedfile)

    counter = defaultdict(int)

    infile = pysam.AlignmentFile("-", "rb")
    outfile = pysam.AlignmentFile("-", "wh", template=infile)
    for s in infile:
        cigar = copy(s.cigartuples)

        ## logic - if alignment start site is _before_ but within X bases of
        ## a primer site, trim it off

        if s.is_unmapped:
            sys.stderr.write("%s skipped as unmapped" % (s.query_name))
            continue

        if s.is_supplementary:
            sys.stderr.write("%s skipped as supplementary" % (s.query_name))
            continue

        p1 = find_primer(bed, s.reference_start, '+')
        p2 = find_primer(bed, s.reference_end, '-')

        report = "%s\t%s\t%s\t%s_%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
            s.query_name, s.reference_start, s.reference_end,
            p1[2]['Primer_ID'], p2[2]['Primer_ID'], p1[2]['Primer_ID'],
            abs(p1[1]), p2[2]['Primer_ID'], abs(p2[1]), s.is_secondary,
            s.is_supplementary, p1[2]['start'], p2[2]['end'])
        if args.report:
            print(report, file=reportfh)

        if args.verbose:
            sys.stderr.write(report)

        ## if the alignment starts before the end of the primer, trim to that position

        try:
            if args.start:
                primer_position = p1[2]['start']
            else:
                primer_position = p1[2]['end']

            if s.reference_start < primer_position:
                trim(cigar, s, primer_position, 0)
            else:
                if args.verbose:
                    sys.stderr.write("ref start %s >= primer_position %s" %
                                     (s.reference_start, primer_position))

            if args.start:
                primer_position = p2[2]['start']
            else:
                primer_position = p2[2]['end']

            if s.reference_end > primer_position:
                trim(cigar, s, primer_position, 1)
            else:
                if args.verbose:
                    sys.stderr.write("ref end %s >= primer_position %s" %
                                     (s.reference_end, primer_position))
        except Exception as e:
            sys.stderr.write("problem %s" % (e, ))
            pass

        if args.normalise:
            pair = "%s-%s-%d" % (p1[2]['Primer_ID'], p2[2]['Primer_ID'],
                                 s.is_reverse)
            counter[pair] += 1

            if counter[pair] > args.normalise:
                continue

        ## if the alignment starts before the end of the primer, trim to that position


#      trim(s, s.reference_start + 40, 0)
#      trim(s, s.reference_end - 40, 1)
#
#      outfile.write(s)
#   except Exception:
#      pass

        if not check_still_matching_bases(s):
            continue

        outfile.write(s)

    reportfh.close()
Esempio n. 3
0
    if not end:
        s.pos = pos - extra

    #print >>sys.stderr,  "New pos: %s" % (s.pos)

    if end:
        cigar.append((4, eaten))
    else:
        cigar.insert(0, (4, eaten))
    oldcigarstring = s.cigarstring
    s.cigartuples = cigar

    #print >>sys.stderr,  s.query_name, oldcigarstring[0:50], s.cigarstring[0:50]


bed = read_bed_file('all')


def find_primer(pos, direction):
    # {'Amplicon_size': '1874', 'end': 7651, '#Region': 'region_4', 'start': 7633, 'Coords': '7633', "Sequence_(5-3')": 'GCTGGCCCGAAATATGGT', 'Primer_ID': '16_R'}
    from operator import itemgetter

    closest = min([(abs(p['start'] - pos), p['start'] - pos, p)
                   for p in bed if p['direction'] == direction],
                  key=itemgetter(0))
    return closest


infile = pysam.AlignmentFile("-", "rb")
outfile = pysam.AlignmentFile("-", "wh", template=infile)
for s in infile:
Esempio n. 4
0
	if not end:
			s.pos = pos - extra

	#print >>sys.stderr,  "New pos: %s" % (s.pos)

	if end:
		cigar.append((4, eaten))
	else:
		cigar.insert(0, (4, eaten))
	oldcigarstring = s.cigarstring
	s.cigartuples = cigar

	#print >>sys.stderr,  s.query_name, oldcigarstring[0:50], s.cigarstring[0:50]

bed = read_bed_file('all')

def find_primer(pos, direction):
	# {'Amplicon_size': '1874', 'end': 7651, '#Region': 'region_4', 'start': 7633, 'Coords': '7633', "Sequence_(5-3')": 'GCTGGCCCGAAATATGGT', 'Primer_ID': '16_R'}
	from operator import itemgetter

	closest = min([(abs(p['start'] - pos), p['start'] - pos, p) for p in bed if p['direction'] == direction], key=itemgetter(0))
	return closest

infile = pysam.AlignmentFile("-", "rb")
outfile = pysam.AlignmentFile("-", "wh", template=infile)
for s in infile:
	cigar = copy(s.cigartuples)

	if len(sys.argv) > 1:
		if not s.query_name.startswith(sys.argv[1]):
Esempio n. 5
0
#MASKED_POSITIONS.extend([n for n in xrange(17135, 17169)])
#MASKED_POSITIONS.extend([n for n in xrange(5742, 5758)])

#MASKED_POSITIONS = [2282, 14011, 5312, 5313]
#MASKED_POSITIONS = [2282, 11973]
MASKED_POSITIONS = [2282]

reference = sys.argv[1]
vcffile = sys.argv[2]
bamfile = sys.argv[3]
primerset = sys.argv[4]
#MASKED_POSITIONS.extend([int(n) for n in sys.argv[5].split(",")])

DEPTH_THRESHOLD = 25

bed = read_bed_file(primerset)
for primer in bed:
	MASKED_POSITIONS.extend([n for n in xrange(primer['start'], primer['end'])])

def collect_depths(bamfile):
	if not os.path.exists(bamfile):
		raise SystemExit("bamfile %s doesn't exist" % (bamfile,))

	print >>sys.stderr, bamfile

	p = subprocess.Popen(['samtools', 'depth', bamfile],
                             stdout=subprocess.PIPE)
	out, err = p.communicate()
	depths = defaultdict(dict)
	for ln in out.split("\n"):
        	if ln: