Esempio n. 1
0
    tog2 = open(gtffn + ".downstream_introns", 'w')
else:
    fivesfn = gtffn + ".fives.%s_%s.fasta" % (offset[0], offset[1])
    threesfn = gtffn + ".threes.%s_%s.fasta" % (offset[2], offset[3])
    f5 = open(fivesfn, 'w')
    f3 = open(threesfn, 'w')

for pieces in junctions:
    if get_introns:
        seqs = list()
        for p in pieces:
            if p[:3] == "chr": q = p[3:-2]
            else: q = p[:-2]
            seq = fastafile.fetch(region=q)
            if p[-1] == '-':
                seq = complementDNA(seq[::-1])
            seqs += [seq]
    else:
        five, three = pieces
        A = fastafile.fetch(region=five)
        B = fastafile.fetch(region=three)
        if five[-1] == '-':
            A = complementDNA(A[::-1])  # reverse the string
        if three[-1] == '-':
            B = complementDNA(B[::-1])

    if get_introns:
        u5, u3, d5, d3 = pieces
        print >> tog1, "\t".join(
            [u5, u3] + [seqs[0][:offset[0]] + seqs[0][offset[0]:].lower()] +
            [seqs[1][:offset[2]].lower() + seqs[1][offset[2]:]])
except IndexError:
    print >> sys.stderr, "Usage: script.py <exon_fn> <gene_fn> <tabix_fn>"
    sys.exit(0)

fastafile = pysam.Fastafile(fasta_fn)

keys = dict()
f = open(exon_fn)
for row in f:
    keys[tuple(row.strip().split(':'))] = []
f.close()

f = open(gene_fn)
c = 0
for row in f:
    if c > 5: break
    l = row.strip().split('\t')
    sd = l[5]
    if (l[0], l[1]) in keys:
        coord = l[2] + ":" + l[3] + "-" + l[4]
        results = fastafile.fetch(region=coord)
        print ">%s" % coord + ":" + sd
        if sd == '+':
            print results
        elif sd == '-':
            print complementDNA(results)[::-1]
        c += 0
f.close()

fastafile.close()
Esempio n. 3
0
#!/home/paulk/software/bin/python
from __future__ import division
from sys import argv,exit,stderr
import pysam
from key_functions import complementDNA

fastafile = pysam.Fastafile("resources/refs/hg19/Homo_sapiens.GRCh37.66.dna.fa")

f = open("u12_introns_all_norm_ps_details.txt")
for row in f:
	if row[0] == 'i': continue
	l = row.strip().split('\t')
	c1 = map(str,[l[8][3:],int(l[11]),int(l[11])+1])
	c2 = map(str,[l[8][3:],int(l[12])-1,int(l[12])])
	reg1 = c1[0]+":"+c1[1]+"-"+c1[2]
	reg2 = c2[0]+":"+c2[1]+"-"+c2[2]
	if l[-6] == "U12-U2":
		print l[4],
		if l[14] == '-':
			print complementDNA(fastafile.fetch(region=reg2))[::-1]+"-"+complementDNA(fastafile.fetch(region=reg1))[::-1]
		else:
			print fastafile.fetch(region=reg1)+"-"+fastafile.fetch(region=reg2)
f.close()
Esempio n. 4
0
	tog2 = open(gtffn+".downstream_introns",'w')
else:
	fivesfn = gtffn+".fives.%s_%s.fasta"%(offset[0],offset[1])
	threesfn = gtffn+".threes.%s_%s.fasta"%(offset[2],offset[3])
	f5 = open(fivesfn,'w')
	f3 = open(threesfn,'w')

for pieces in junctions:
	if get_introns:
		seqs = list()
		for p in pieces:
			if p[:3] == "chr": q = p[3:-2]
			else: q = p[:-2]
			seq = fastafile.fetch(region=q)
			if p[-1] == '-':
				seq = complementDNA(seq[::-1])
			seqs += [seq]
	else:
		five,three = pieces
		A = fastafile.fetch(region=five)
		B = fastafile.fetch(region=three)
		if five[-1] == '-':
			A = complementDNA(A[::-1])	# reverse the string
		if three[-1] == '-':
			B = complementDNA(B[::-1])
	
	if get_introns:
		u5,u3,d5,d3 = pieces
		print >> tog1,"\t".join([u5,u3]+[seqs[0][:offset[0]]+seqs[0][offset[0]:].lower()]+[seqs[1][:offset[2]].lower()+seqs[1][offset[2]:]])
		print >> tog2,"\t".join([d5,d3]+[seqs[2][:offset[0]]+seqs[2][offset[0]:].lower()]+[seqs[3][:offset[2]].lower()+seqs[3][offset[2]:]])
	else:
Esempio n. 5
0
#!/home/paulk/software/bin/python
from __future__ import division
from sys import argv, exit, stderr
import pysam
from key_functions import complementDNA

fastafile = pysam.Fastafile(
    "resources/refs/hg19/Homo_sapiens.GRCh37.66.dna.fa")

f = open("u12_introns_all_norm_ps_details.txt")
for row in f:
    if row[0] == 'i': continue
    l = row.strip().split('\t')
    c1 = map(str, [l[8][3:], int(l[11]), int(l[11]) + 1])
    c2 = map(str, [l[8][3:], int(l[12]) - 1, int(l[12])])
    reg1 = c1[0] + ":" + c1[1] + "-" + c1[2]
    reg2 = c2[0] + ":" + c2[1] + "-" + c2[2]
    if l[-6] == "U12-U2":
        print l[4],
        if l[14] == '-':
            print complementDNA(
                fastafile.fetch(region=reg2))[::-1] + "-" + complementDNA(
                    fastafile.fetch(region=reg1))[::-1]
        else:
            print fastafile.fetch(region=reg1) + "-" + fastafile.fetch(
                region=reg2)
f.close()
except IndexError:
	print >> sys.stderr,"Usage: script.py <exon_fn> <gene_fn> <tabix_fn>"
	sys.exit(0)

fastafile = pysam.Fastafile(fasta_fn)

keys = dict()
f = open(exon_fn)
for row in f:
	keys[tuple(row.strip().split(':'))] = []
f.close()

f = open(gene_fn)
c = 0
for row in f:
	if c > 5: break
	l = row.strip().split('\t')
	sd = l[5]
	if (l[0],l[1]) in keys:
		coord =  l[2]+":"+l[3]+"-"+l[4]
		results = fastafile.fetch(region=coord)
		print ">%s" % coord+":"+sd
		if sd == '+':
			print results
		elif sd == '-':
			print complementDNA(results)[::-1]
		c += 0
f.close()

fastafile.close()