def main(fastafile, predfile, actualfile): readfasta = True #Load each gff nucstats, exonstats, genestats = zeros(4), zeros(4), zeros(4) exonclassified = {'ME':0, 'WE':0, 'PE':0, 'CE':0} splicestats = {} predicted = Features(predfile, 'exon') actual = Features(actualfile) syncReferences(actual, predicted) #Nucleotide stats if readfasta == True: try: fastaseqs = fasta.loadMfa(fastafile) except StandardError: print "Failed to load fasta sequence." sys.exit(2) for (head,seq) in fastaseqs: header = head.split(':') #print header, head, head in actual if header[0] in actual: title = header[0] if head in actual: title = head startref = string.atoi(header[1]) endref = startref + len(seq) - 1 #verifyFasta(head,seq,predlist) if title in actual: actualnuc = exonString(actual[title], startref, endref) predictednuc = exonString(predicted[title], startref, endref) stats = computeNucStats(actualnuc, predictednuc) nucstats += stats else: print 'Sequence from ', title, ' has no actual gene annotation. Skipping.' #Exon, gene stats for ref in actual: computeSpliceStats(actual, predicted, ref, splicestats) classifyExonStats(actual, predicted, ref, exonclassified) exonstats += computeExonStats(actual, predicted, ref) genestats += computeGeneStats(actual, predicted, ref) return (nucstats, exonstats, genestats, exonclassified)
sys.exit(0) if o in("-f", "--fasta"): if len(a): fastafile = a readfasta = True #Load each gff nucstats, exonstats, genestats = zeros(4), zeros(4), zeros(4) exonclassified = {'ME':0, 'WE':0, 'PE':0, 'CE':0} splicestats = {} predicted = Features(args[0], 'exon') actual = Features(args[1]) syncReferences(actual, predicted) #Nucleotide stats if readfasta == True: try: fastaseqs = fasta.loadMfa(fastafile) except StandardError: print "Failed to load fasta sequence." sys.exit(2) for (head,seq) in fastaseqs: header = head.split(':') #print header, head, head in actual if header[0] in actual: title = header[0] if head in actual: title = head startref = string.atoi(header[1]) endref = startref + len(seq) - 1 #verifyFasta(head,seq,predlist) if title in actual: actualnuc = exonString(actual[title], startref, endref) predictednuc = exonString(predicted[title], startref, endref) stats = computeNucStats(actualnuc, predictednuc)
#!/usr/bin/env python """viewallgff.py <fasta_file> <gff_file> Script to output sequences, combined into one gene, from a list of gff features """ import fasta, sys from common.feature import * from common.sequence import * if len(sys.argv) != 3: print __doc__ sys.exit(0) genes = Features(sys.argv[2]) fastaseqs = fasta.loadMfa(sys.argv[1]) flank = 0 for (head,seq) in fastaseqs: header = head.split(':') startref = int(header[1]) if head in genes: ref = head; print ref else: ref = header[0] if ref in genes: for name in genes[ref]: gene = genes[ref][name] print '>' + name codingsequence = '' coords= gene.coords[:] if gene.strand == '-': coords.reverse()