from gtfIO import GTFReader, GTFWriter from optparse import OptionParser parser = OptionParser() parser.add_option("--input", dest="input", help="A gtf file containing the RepeatMasked gtf annotation") parser.add_option("--minlen", dest="minleng", help="minimum length") parser.add_option("--output", dest="output", help="A gtf output file") parser.add_option("--maxdiv", dest="maxdiv", help="minimum length", default=99999999999999999) (options, args) = parser.parse_args() minleng = int(options.minleng) w = GTFWriter(options.output) for e in GTFReader(options.input): leng = (e.end - e.start) + 1 if (leng >= minleng): if (e.score < float(options.maxdiv)): w.write(e) w.close()
help="A gtf file containing the reference annotation") parser.add_option( "--totest", dest="totest", help="A gtf file containing the annotation that should be tested") parser.add_option( "--output", dest="output", help= "The output file containing novel TE insertions not in the reference genome" ) (options, args) = parser.parse_args() # chromosomes with incomplete reference annotation: 2L, 2R, 3L # fine: 3R, X, 4 rawref = GTFReader.readall(options.reference) rawtotest = GTFReader.readall(options.totest) chref = convert_chrhash(rawref) chtotest = convert_chrhash(rawtotest) ofh = GTFWriter(options.output) for chr in ["X", "2L", "2R", "3L", "3R", "4"]: cref = chref[chr] ctotest = chtotest[chr] ne = novelentries(cref, ctotest) for n in ne: ofh.write(n) ofh.close()
from gtfIO import GTFReader,GTFWriter; import sys import random from optparse import OptionParser, OptionGroup import collections parser = OptionParser() parser.add_option("--input",dest="input",help="A gtf file containing the RepeatMasked gtf annotation") parser.add_option("--min-leng",dest="minleng",help="minimum length") parser.add_option("--output",dest="output",help="A gtf output file") (options, args) = parser.parse_args() minleng=int(options.minleng) w=GTFWriter(options.output) for e in GTFReader(options.input): leng=(e.end-e.start)+1 if(leng>=minleng): w.write(e) w.close()
# chromosomes with incomplete reference annotation: 2L, 2R, 3L # fine: 3R, X, 4 rawref=GTFReader.readall(options.reference) rawtotest=GTFReader.readall(options.totest) chref=convert_chrhash(rawref) chtotest=convert_chrhash(rawtotest) ofh=GTFWriter(options.output) for chr in ["X","2L","2R","3L","3R","4"]: cref=chref[chr] ctotest=chtotest[chr] ne=novelentries(cref,ctotest) for n in ne: ofh.write(n) ofh.close()