Beispiel #1
0
def main(fastafile, predfile, actualfile):
	readfasta = True
	#Load each gff
	nucstats, exonstats, genestats = zeros(4), zeros(4), zeros(4)
	exonclassified = {'ME':0, 'WE':0, 'PE':0, 'CE':0}
	splicestats = {}
	predicted = Features(predfile, 'exon')
	actual = Features(actualfile)
	syncReferences(actual, predicted)
	#Nucleotide stats
	if readfasta == True:
		try:
			fastaseqs = fasta.loadMfa(fastafile)
		except StandardError:
			print "Failed to load fasta sequence."
			sys.exit(2)
		for (head,seq) in fastaseqs:
			header = head.split(':')
			#print header, head, head in actual
			if header[0] in actual: title = header[0]
			if head in actual: title = head
			startref = string.atoi(header[1])
			endref = startref + len(seq) - 1
			#verifyFasta(head,seq,predlist)
			if title in actual:
				actualnuc = exonString(actual[title], startref, endref)
				predictednuc = exonString(predicted[title], startref, endref)
				stats = computeNucStats(actualnuc, predictednuc)
				nucstats += stats
			else:
				print 'Sequence from ', title, ' has no actual gene annotation. Skipping.'
	
	#Exon, gene stats
	for ref in actual:
		computeSpliceStats(actual, predicted, ref, splicestats)
		classifyExonStats(actual, predicted, ref, exonclassified)
		exonstats += computeExonStats(actual, predicted, ref)
		genestats += computeGeneStats(actual, predicted, ref)
	return (nucstats, exonstats, genestats, exonclassified)
Beispiel #2
0
			sys.exit(0)
		if o in("-f", "--fasta"):
			if len(a):
				fastafile = a
				readfasta = True
	#Load each gff
	nucstats, exonstats, genestats = zeros(4), zeros(4), zeros(4)
	exonclassified = {'ME':0, 'WE':0, 'PE':0, 'CE':0}
	splicestats = {}
	predicted = Features(args[0], 'exon')
	actual = Features(args[1])
	syncReferences(actual, predicted)
	#Nucleotide stats
	if readfasta == True:
		try:
			fastaseqs = fasta.loadMfa(fastafile)
		except StandardError:
			print "Failed to load fasta sequence."
			sys.exit(2)
		for (head,seq) in fastaseqs:
			header = head.split(':')
			#print header, head, head in actual
			if header[0] in actual: title = header[0]
			if head in actual: title = head
			startref = string.atoi(header[1])
			endref = startref + len(seq) - 1
			#verifyFasta(head,seq,predlist)
			if title in actual:
				actualnuc = exonString(actual[title], startref, endref)
				predictednuc = exonString(predicted[title], startref, endref)
				stats = computeNucStats(actualnuc, predictednuc)
Beispiel #3
0
#!/usr/bin/env python
"""viewallgff.py <fasta_file> <gff_file>

Script to output sequences, combined into one gene, from a list of gff features
"""

import fasta, sys
from common.feature import *
from common.sequence import *

if len(sys.argv) != 3:
	print __doc__
	sys.exit(0)

genes = Features(sys.argv[2])
fastaseqs = fasta.loadMfa(sys.argv[1])
flank = 0

for (head,seq) in fastaseqs:
	header = head.split(':')
	startref = int(header[1])
	if head in genes: ref = head; print ref
	else: ref = header[0]
	if ref in genes:
		for name in genes[ref]:
			gene = genes[ref][name]
			print '>' + name
			codingsequence = ''
			coords= gene.coords[:]
			if gene.strand == '-':
				coords.reverse()