def __init__(self, file):
     self.__gtfreader = GTFReader(file)
Ejemplo n.º 2
0
from gtfIO import GTFReader, GTFWriter
from optparse import OptionParser

parser = OptionParser()
parser.add_option("--input",
                  dest="input",
                  help="A gtf file containing the RepeatMasked gtf annotation")
parser.add_option("--minlen", dest="minleng", help="minimum length")
parser.add_option("--output", dest="output", help="A gtf output file")
parser.add_option("--maxdiv",
                  dest="maxdiv",
                  help="minimum length",
                  default=99999999999999999)
(options, args) = parser.parse_args()

minleng = int(options.minleng)
w = GTFWriter(options.output)

for e in GTFReader(options.input):
    leng = (e.end - e.start) + 1
    if (leng >= minleng):
        if (e.score < float(options.maxdiv)):
            w.write(e)
w.close()
parser = OptionParser()
parser.add_option("--reference",dest="reference",help="A gtf file containing the reference annotation")
parser.add_option("--totest",dest="totest",help="A gtf file containing the annotation that should be tested")
(options, args) = parser.parse_args()
regionfilter={
	"X":  [0,22422827],
	"2L": [0,22420241], 
	"2R": [387345,21146708],
	"3L": [0,23825333],
	"3R": [0,27905053],
	"4" : [0,1350078]}
# chromosomes with incomplete reference annotation: 2L, 2R, 3L
# fine: 3R, X, 4


rawref=filterregion(GTFReader.readall(options.reference),regionfilter)
rawtotest=filterregion(GTFReader.readall(options.totest),regionfilter)
chref=convert_chrhash(rawref)
chtotest=convert_chrhash(rawtotest)

# ONLY TEST the chromosomes in the given list
tcboverlap,tcbref,tcbtotest=(0,0,0) # total count bases =tcb
tceoverlap,tceoverlap2,tceref,tcetotest=(0,0,0,0) # total count element =tcb
countRef=0
countTotest=0
countBRef=0
countBTotest=0
for chr in regionfilter.keys():
	cref=chref[chr]
	ctotest=chtotest[chr]
	#intoverlapref=identifyOverlap(cref)
Ejemplo n.º 4
0
    "--totest",
    dest="totest",
    help="A gtf file containing the annotation that should be tested")
(options, args) = parser.parse_args()
regionfilter = {
    "X": [0, 22422827],
    "2L": [0, 22420241],
    "2R": [387345, 21146708],
    "3L": [0, 23825333],
    "3R": [0, 27905053],
    "4": [0, 1350078]
}
# chromosomes with incomplete reference annotation: 2L, 2R, 3L
# fine: 3R, X, 4

rawref = filterregion(GTFReader.readall(options.reference), regionfilter)
rawtotest = filterregion(GTFReader.readall(options.totest), regionfilter)
chref = convert_chrhash(rawref)
chtotest = convert_chrhash(rawtotest)

# ONLY TEST the chromosomes in the given list
tcboverlap, tcbref, tcbtotest = (0, 0, 0)  # total count bases =tcb
tceoverlap, tceoverlap2, tceref, tcetotest = (0, 0, 0, 0
                                              )  # total count element =tcb
countRef = 0
countTotest = 0
countBRef = 0
countBTotest = 0
for chr in regionfilter.keys():
    cref = chref[chr]
    ctotest = chtotest[chr]
                  help="A gtf file containing the reference annotation")
parser.add_option(
    "--totest",
    dest="totest",
    help="A gtf file containing the annotation that should be tested")
parser.add_option(
    "--output",
    dest="output",
    help=
    "The output file containing novel TE insertions not in the reference genome"
)
(options, args) = parser.parse_args()

# chromosomes with incomplete reference annotation: 2L, 2R, 3L
# fine: 3R, X, 4

rawref = GTFReader.readall(options.reference)
rawtotest = GTFReader.readall(options.totest)
chref = convert_chrhash(rawref)
chtotest = convert_chrhash(rawtotest)

ofh = GTFWriter(options.output)

for chr in ["X", "2L", "2R", "3L", "3R", "4"]:
    cref = chref[chr]
    ctotest = chtotest[chr]
    ne = novelentries(cref, ctotest)
    for n in ne:
        ofh.write(n)
ofh.close()
			e.target=target
			return e
		raise StopIteration


parser = OptionParser()
parser.add_option("--reference",dest="reference",help="A gtf file containing the reference annotation")
parser.add_option("--totest",dest="totest",help="A gtf file containing the annotation that should be tested")
parser.add_option("--output",dest="output",help="The output file containing novel TE insertions not in the reference genome")
(options, args) = parser.parse_args()

# chromosomes with incomplete reference annotation: 2L, 2R, 3L
# fine: 3R, X, 4


rawref=GTFReader.readall(options.reference)
rawtotest=GTFReader.readall(options.totest)
chref=convert_chrhash(rawref)
chtotest=convert_chrhash(rawtotest)

ofh=GTFWriter(options.output)

for chr in ["X","2L","2R","3L","3R","4"]:
	cref=chref[chr]
	ctotest=chtotest[chr]
	ne=novelentries(cref,ctotest)
	for n in ne:
		ofh.write(n)
ofh.close()