def runGenomescan(features, debug=False): """Run genomescan supplying extracted features from genome as homologous proteins.""" for i,feature in enumerate(features): print i+1, feature oFilename = 'gsOutput/%s.html' % feature.domain annotFilename = 'gsAnnotations/%s.txt' % feature.domain pepFilename = 'gsPeptides/%s.fa' % feature.domain if not os.path.exists(oFilename): html = genomescanFromFeature(feature, blastDb, oFileHandle=oFilename) else: html = open(oFilename).read() html = html.split('\n') annotation, peptides = parseGenomeScanOutput(html) print >> open(annotFilename, 'w'), annotation fakeFaFile = StringIO.StringIO(peptides) faIter = fasta.load_iter(fakeFaFile) writer = fasta.MfaWriter(pepFilename) for j,(h,s) in enumerate(faIter): block = h.split('|')[0] if j==1: print feature h = '%s.%i %s' % (feature.domain, j+1, block) writer.write(h, s+'\n') writer.close()
def runGenomescan(features, debug=False): """Run genomescan supplying extracted features from genome as homologous proteins.""" for i, feature in enumerate(features): print i + 1, feature oFilename = 'gsOutput/%s.html' % feature.domain annotFilename = 'gsAnnotations/%s.txt' % feature.domain pepFilename = 'gsPeptides/%s.fa' % feature.domain if not os.path.exists(oFilename): html = genomescanFromFeature(feature, blastDb, oFileHandle=oFilename) else: html = open(oFilename).read() html = html.split('\n') annotation, peptides = parseGenomeScanOutput(html) print >> open(annotFilename, 'w'), annotation fakeFaFile = StringIO.StringIO(peptides) faIter = fasta.load_iter(fakeFaFile) writer = fasta.MfaWriter(pepFilename) for j, (h, s) in enumerate(faIter): block = h.split('|')[0] if j == 1: print feature h = '%s.%i %s' % (feature.domain, j + 1, block) writer.write(h, s + '\n') writer.close()
#!/usr/bin/env python """ orfTest.py Author: Tony Papenfuss Date: Tue Aug 22 20:14:57 EST 2006 """ import os, sys import fasta, sequence header,seq = fasta.load('NKC.fa') orfIterator = fasta.load_iter('ORFs.fa') writer = fasta.MfaWriter('ORFs2.fa') for h,orf in orfIterator: chrom,block,orfId,limits = h.split()[0].split('.') start,end = limits.split('-') start = int(start) end = int(end) if start>end: strand = '-' start,end = end,start s = sequence.translate(sequence.reverseComplement(seq[start-1:end])) else: strand = '+' s = sequence.translate(seq[start-1:end])
Author: Tony Papenfuss Date: Wed Aug 23 08:52:58 EST 2006 """ import os, sys import re, copy import fasta, sequence, hmmer3 from hmmer3 import hmmer2frame pattern = re.compile('[\*|X{200,}]') minLen = 20 i = 0 writer = fasta.MfaWriter('ORFs.fa') faFile = fasta.load_iter('6frames.fa') for header, seq in faFile: header = header.strip() print >> sys.stderr, header block, hmmerFrame = header.split(':') frame = hmmer2frame[int(hmmerFrame)] matchIter = pattern.finditer(seq) try: match = matchIter.next() except StopIteration: print match print seq sys.exit() start = match.start()
def getSizes(filenames): for filename in filenames: for h, s in fasta.load_iter(filename): name = h.split()[0] L = len(s) print '%s\t%s' % (name, L)
writer.write('%s:%i' % (header,frame),p) writer.close() sys.exit() # Initialize() header,seq = fasta.load('MHC_hg18.fa') L = len(seq) hstart = header.split()[0] pattern = re.compile('\*|X{200,}') minLen = 20 # sixFrameIter = sequence.sixFrameTranslationIter(seq) sixFrameIter = fasta.load_iter('6frames.fa') writer = fasta.MfaWriter('ORFs.fa') i = 0 for h,p in sixFrameIter: hmmerFrame = int(h.split(':')[-1]) frame = hmmer.hmmer2frame[hmmerFrame] print >> sys.stderr, 'Frame:', frame if frame>0: strand = '+' else: strand = '-' matchIter = pattern.finditer(p) match = matchIter.next() start = match.start()
writer.write('%s:%i' % (header, frame), p) writer.close() sys.exit() # Initialize() header, seq = fasta.load('MHC_hg18.fa') L = len(seq) hstart = header.split()[0] pattern = re.compile('\*|X{200,}') minLen = 20 # sixFrameIter = sequence.sixFrameTranslationIter(seq) sixFrameIter = fasta.load_iter('6frames.fa') writer = fasta.MfaWriter('ORFs.fa') i = 0 for h, p in sixFrameIter: hmmerFrame = int(h.split(':')[-1]) frame = hmmer.hmmer2frame[hmmerFrame] print >> sys.stderr, 'Frame:', frame if frame > 0: strand = '+' else: strand = '-' matchIter = pattern.finditer(p) match = matchIter.next() start = match.start()
Date: Wed Aug 23 08:52:58 EST 2006 """ import os, sys import re, copy import fasta, sequence, hmmer3 from hmmer3 import hmmer2frame pattern = re.compile('[\*|X{200,}]') minLen = 20 i = 0 writer = fasta.MfaWriter('ORFs.fa') faFile = fasta.load_iter('6frames.fa') for header,seq in faFile: header = header.strip() print >> sys.stderr, header block,hmmerFrame = header.split(':') frame = hmmer2frame[int(hmmerFrame)] matchIter = pattern.finditer(seq) try: match = matchIter.next() except StopIteration: print match print seq sys.exit() start = match.start()
def getSizes(filenames): for filename in filenames: for h,s in fasta.load_iter(filename): name = h.split()[0] L = len(s) print '%s\t%s' % (name,L)