end=te.end
			seq=maskSeq(seq,start,end)		# mask the TE with Ns
		novelrefseq[chr]=seq
	return novelrefseq



def printSequences(seq,outfasta):
	fw=FastaWriter(outfasta,60)
	for n,s in seq.items():
		fw.write(n,s)
	fw.close()
	
	

parser = OptionParser()
parser.add_option("--gtf",dest="gtfte",help="A gtf file containing the TE annotation")
parser.add_option("--input",dest="fastaref",help="A fasta file containing the reference sequence")
parser.add_option("--output",dest="outfasta",help="The output of the fasta sequences"),


(options, args) = parser.parse_args()
print("Loading refseqs..")
refseqs = FastaReader.readFastaHash(options.fastaref)
print("Loading gtf..")
noveltegtf= GTFTEReader.readall(options.gtfte)
print("Masking reference sequence..")
novelrefseq=maskTEsinSeq(noveltegtf,refseqs)
print("Printing masked reference sequence..")
printSequences(novelrefseq,options.outfasta)
Exemple #2
0
(options, args) = parser.parse_args()

teorder = [
    "1360", "17.6", "1731", "297", "3S18", "412", "accord", "accord2",
    "aurora-element", "baggins", "Bari1", "Bari2", "blood", "BS", "BS3", "BS4",
    "Burdock", "Circe", "copia", "Cr1a", "diver", "diver2", "Dm88", "Doc",
    "Doc2-element", "Doc3-element", "Doc4-element", "F-element", "FB", "flea",
    "frogger", "Fw2", "Fw3", "G-element", "G2", "G3", "G4", "G5", "G5A", "G6",
    "G7", "GATE", "gtwin", "gypsy", "gypsy10", "gypsy11", "gypsy12", "gypsy2",
    "gypsy3", "gypsy4", "gypsy5", "gypsy6", "gypsy7", "gypsy8", "gypsy9", "HB",
    "Helena", "HeT-A", "HMS-Beagle", "HMS-Beagle2", "hobo", "hopper",
    "hopper2", "I-element", "Idefix", "INE-1", "invader1", "invader2",
    "invader3", "invader4", "invader5", "invader6", "Ivk", "jockey", "jockey2",
    "Juan", "looper1", "Mariner", "mariner2", "Max-element", "McClintock",
    "mdg1", "mdg3", "micropia", "NOF", "opus", "Osvaldo", "P-element", "pogo",
    "Porto1", "Q-element", "Quasimodo", "R1-2", "R1A1-element", "R2-element",
    "roo", "rooA", "rover", "Rt1a", "Rt1b", "Rt1c", "S-element", "S2",
    "springer", "Stalker", "Stalker2", "Stalker3", "Stalker4", "Tabor",
    "TAHRE", "Tc1", "Tc1-2", "Tc3", "Tirant", "Tom1", "transib1", "transib2",
    "transib3", "transib4", "Transpac", "X-element", "ZAM"
]

print("Loading refseqs..")
refseqs = FastaReader.readFastaHash(options.teseqs)
f2e = read_famtoentry(options.hier)
for fam in teorder:
    entry = f2e[fam]
    seq = refseqs[entry]
    l = len(seq)
    print "{0}\t{1}".format(fam, l)
    return novelrefseq


def printSequences(seq, outfasta):
    fw = FastaWriter(outfasta, 60)
    for n, s in seq.items():
        fw.write(n, s)
    fw.close()


parser = OptionParser()
parser.add_option("--gtf",
                  dest="gtfte",
                  help="A gtf file containing the TE annotation")
parser.add_option("--input",
                  dest="fastaref",
                  help="A fasta file containing the reference sequence")
parser.add_option("--output",
                  dest="outfasta",
                  help="The output of the fasta sequences"),

(options, args) = parser.parse_args()
print("Loading refseqs..")
refseqs = FastaReader.readFastaHash(options.fastaref)
print("Loading gtf..")
noveltegtf = GTFTEReader.readall(options.gtfte)
print("Masking reference sequence..")
novelrefseq = maskTEsinSeq(noveltegtf, refseqs)
print("Printing masked reference sequence..")
printSequences(novelrefseq, options.outfasta)
                a=l.split("\t")
                entry=a[0]
                fam=a[2]
                ord=a[4]
                fto[fam]=entry
        return fto


parser = OptionParser()
parser.add_option("--input",dest="teseqs",help="The TE seqs")
parser.add_option("--hier",dest="hier",help="the te hierarchy")
(options, args) = parser.parse_args()

teorder=["1360","17.6","1731","297","3S18","412","accord","accord2","aurora-element","baggins","Bari1","Bari2","blood","BS","BS3","BS4","Burdock","Circe","copia","Cr1a","diver","diver2","Dm88","Doc","Doc2-element","Doc3-element","Doc4-element",
	 "F-element","FB","flea","frogger","Fw2","Fw3","G-element","G2","G3","G4","G5","G5A","G6","G7","GATE","gtwin","gypsy","gypsy10","gypsy11","gypsy12","gypsy2","gypsy3","gypsy4","gypsy5",
	 "gypsy6","gypsy7","gypsy8","gypsy9","HB","Helena","HeT-A","HMS-Beagle","HMS-Beagle2","hobo","hopper","hopper2","I-element","Idefix","INE-1","invader1","invader2","invader3","invader4",
	 "invader5","invader6","Ivk","jockey","jockey2","Juan","looper1","Mariner","mariner2","Max-element","McClintock","mdg1","mdg3","micropia","NOF","opus","Osvaldo","P-element","pogo",
	 "Porto1","Q-element","Quasimodo","R1-2","R1A1-element","R2-element","roo","rooA","rover","Rt1a","Rt1b","Rt1c","S-element","S2","springer","Stalker","Stalker2","Stalker3","Stalker4",
	 "Tabor","TAHRE","Tc1","Tc1-2","Tc3","Tirant","Tom1","transib1","transib2","transib3","transib4","Transpac","X-element","ZAM"]



print("Loading refseqs..")
refseqs = FastaReader.readFastaHash(options.teseqs)
f2e=read_famtoentry(options.hier)
for fam in teorder:
        entry=f2e[fam]
        seq=refseqs[entry]
        l=len(seq)
        print "{0}\t{1}".format(fam,l)