Ejemplo n.º 1
0
MIN_INTRON_SIZE = 1000

samfile = pysam.AlignmentFile(sys.argv[1], 'rb')
ss = get_jxns(sys.argv[2])
seq = Seq(load_genome(open(sys.argv[3], 'r')))

for read in samfile.fetch():
	blocks = merge_blocks(read.get_blocks())
	strand = (read.is_read1 == read.is_reverse)
	if len(blocks) > 1:
		for i in xrange(len(blocks) - 1):
			if strand:
				five, three = blocks[i][1], blocks[i+1][0]
			else:
				three, five = blocks[i][1], blocks[i+1][0]

			# Some insertion events align to 5'ss and cause false positives
			# ... easily fixed by asserting a minimum recursive intron length.
			if abs(five - three) < MIN_RECURSIVE_INTRON_SIZE: continue

			chrom = samfile.getrname(read.reference_id)
			if chrom[:3] == 'chr': chrom = chrom[3:]
			strand_str = '+' if strand else '-'

			if (chrom, strand_str, five) in ss:
				intron_three = ss[(chrom, strand_str, five)]
				if strand and three < intron_three and seq.query(chrom, strand, three):
					print '\t'.join(map(str, [chrom, five, three, read.query_name, blocks, strand_str]))
				elif not strand and three > intron_three and seq.query(chrom, strand, three):
					print '\t'.join(map(str,[chrom, three, five, read.query_name, blocks, strand_str]))
Ejemplo n.º 2
0
    strand_str = '+' if strand else '-'
    chrom = samfile.getrname(read.reference_id)
    if chrom[:3] == 'chr': chrom = chrom[3:]

    # The following two blocks are identical and handle + and - strands.
    if strand and read.pnext - read.pos > MAX_ALLOWED_INSERT:
        inner_left = read.get_blocks()[-1][1]  # End   of mate #1
        inner_right = read.pnext  # Start of mate #2

        # [(five, three), ...] for all splices between read pair
        splices = jxns.search(chrom, strand_str, inner_left, inner_right)

        # check if consistent with any anno splice
        consistent = False
        for five, three in splices:
            if ALLOW_ANNO_AGGT and seq.query(chrom, strand, three): continue
            cassette = five + OVERHANG >= inner_left and three - OVERHANG <= inner_right
            if cassette and five - inner_left + inner_right - three < MAX_ALLOWED_INSERT:
                consistent = True
        if consistent: continue

        fives = {}
        for five, three in splices:
            if five not in fives: fives[five] = three
            fives[five] = max(three, fives[five])

        # Only print if downstream end lies within an annotated intron
        # and 5'ss close to inner_left.
        for five, three in fives.items():
            if five - read.pos < MAX_FIVE_INSERT and inner_right + OVERHANG < three:
                print '\t'.join(
Ejemplo n.º 3
0
	strand_str = '+' if strand else '-'
	chrom = samfile.getrname(read.reference_id)
	if chrom[:3] == 'chr': chrom = chrom[3:]

	# The following two blocks are identical and handle + and - strands.
	if strand and read.pnext - read.pos > MAX_ALLOWED_INSERT:
		inner_left = read.get_blocks()[-1][1] # End   of mate #1
		inner_right = read.pnext              # Start of mate #2
		
		# [(five, three), ...] for all splices between read pair
		splices = jxns.search(chrom, strand_str, inner_left, inner_right)

		# check if consistent with any anno splice
		consistent = False
		for five, three in splices:
			if ALLOW_ANNO_AGGT and seq.query(chrom, strand, three): continue
			cassette = five + OVERHANG >= inner_left and three - OVERHANG <= inner_right
			if cassette and five - inner_left + inner_right - three < MAX_ALLOWED_INSERT:
				consistent = True
		if consistent: continue

		fives = {}
		for five, three in splices:
			if five not in fives: fives[five] = three
			fives[five] = max(three, fives[five])

		# Only print if downstream end lies within an annotated intron
		# and 5'ss close to inner_left.
		for five, three in fives.items():
			if five - read.pos < MAX_FIVE_INSERT and inner_right + OVERHANG < three:
				print '\t'.join(map(str, [chrom, inner_left, inner_right, five, sample, strand_str]))
Ejemplo n.º 4
0
        for i in xrange(len(blocks) - 1):
            if strand:
                five, three = blocks[i][1], blocks[i + 1][0]
            else:
                three, five = blocks[i][1], blocks[i + 1][0]

            # Some insertion events align to 5'ss and cause false positives
            # ... easily fixed by asserting a minimum recursive intron length.
            if abs(five - three) < MIN_RECURSIVE_INTRON_SIZE: continue

            chrom = samfile.getrname(read.reference_id)
            if chrom[:3] == 'chr': chrom = chrom[3:]
            strand_str = '+' if strand else '-'

            if (chrom, strand_str, five) in ss:
                intron_three = ss[(chrom, strand_str, five)]
                if strand and three < intron_three and seq.query(
                        chrom, strand, three):
                    print '\t'.join(
                        map(str, [
                            chrom, five, three, read.query_name, blocks,
                            strand_str
                        ]))
                elif not strand and three > intron_three and seq.query(
                        chrom, strand, three):
                    print '\t'.join(
                        map(str, [
                            chrom, three, five, read.query_name, blocks,
                            strand_str
                        ]))