rex=Rex() # Process command line if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>") (fastaFile,gffFile,outFile)=sys.argv[1:] # Read GFF reader=GffTranscriptReader() hash=reader.hashBySubstrate(gffFile) # Open output file OUT=open(outFile,"wt") writer=FastaWriter() # Process each substrate in the FASTA file reader=FastaReader(fastaFile) while(True): [defline,seq]=reader.nextSequence() if(not defline): break if(not rex.find("^\s*>\s*(\S+)",defline)): exit("Can't parse defline: "+defline) id=rex[1] transcripts=hash.get(id,None) if(not transcripts): continue for transcript in transcripts: transSeq=transcript.loadTranscriptSeq(seq) writer.addToFasta(">"+transcript.getID(),transSeq,OUT) reader.close() OUT.close()
# The above imports should allow this program to run in both Python 2 and # Python 3. You might need to update your version of module "future". import sys from FastaReader import FastaReader from FastaWriter import FastaWriter from GffTranscriptReader import GffTranscriptReader if(len(sys.argv)!=4): exit(sys.argv[0]+" <in.fasta> <in.gff> <out.fasta>") (fastaFile,gffFile,outFile)=sys.argv[1:] reader=GffTranscriptReader() transcripts=reader.loadGFF(gffFile) keep=set() for transcript in transcripts: if(transcript.getID()[:3]!="ALT"): continue keep.add(transcript.getSubstrate()) reader=FastaReader(fastaFile) writer=FastaWriter() fh=open(outFile,"wt") while(True): (defline,seq)=reader.nextSequence() if(not defline): break (id,attr)=FastaReader.parseDefline(defline) if(id not in keep): continue writer.addToFasta(defline,seq,fh) fh.close() print("[done]",file=sys.stderr)