def runGenomescan(features, debug=False): """Run genomescan supplying extracted features from genome as homologous proteins.""" for i, feature in enumerate(features): print i + 1, feature oFilename = 'gsOutput/%s.html' % feature.domain annotFilename = 'gsAnnotations/%s.txt' % feature.domain pepFilename = 'gsPeptides/%s.fa' % feature.domain if not os.path.exists(oFilename): html = genomescanFromFeature(feature, blastDb, oFileHandle=oFilename) else: html = open(oFilename).read() html = html.split('\n') annotation, peptides = parseGenomeScanOutput(html) print >> open(annotFilename, 'w'), annotation fakeFaFile = StringIO.StringIO(peptides) faIter = fasta.load_iter(fakeFaFile) writer = fasta.MfaWriter(pepFilename) for j, (h, s) in enumerate(faIter): block = h.split('|')[0] if j == 1: print feature h = '%s.%i %s' % (feature.domain, j + 1, block) writer.write(h, s + '\n') writer.close()
def Initialize(): header, seq = fasta.load('MHC_hg18.fa') sixFrameIter = sequence.sixFrameTranslationIter(seq) writer = fasta.MfaWriter('6frames.fa') for frame, p in sixFrameIter: print 'Frame:', frame writer.write('%s:%i' % (header, frame), p) writer.close() sys.exit()
import os, sys import re, copy import fasta, sequence, hmmer3 seqFilename = sys.argv[1] header, seq = fasta.load(seqFilename) header = header.split()[0] L = len(seq) pattern = re.compile('\*') minLen = 10 sixFrameIter = sequence.sixFrameTranslationIter(seq) writer = fasta.MfaWriter(sys.stdout) i = 0 for frame, p in sixFrameIter: print >> sys.stderr, 'Frame:', frame matchIter = pattern.finditer(p) match = matchIter.next() start = match.start() for match in matchIter: end = match.start() orf = p[start + 1:end] length = len(orf) if length >= minLen: gStart, gEnd, strand = hmmer3.convertSixFrameToGenomic( start + 1, end + 1, frame, L)
Date: Tue Aug 15 10:18:46 EST 2006 """ import os, sys import hmmer, fasta, sequence homeDir = os.environ['HOME'] blastdb = os.path.join(homeDir, 'databases/opossum/assembly/blastdb/assembly') ioDir = sys.argv[1] os.chdir(ioDir) genomicFile = open('DEFB_genomic.txt', 'w') summaryFile = open('DEFB_summary.txt', 'w') dnaWriter = fasta.MfaWriter('DEFB_extracted.fa') pepWriter = fasta.MfaWriter('DEFB_extracted_pep.fa') domains = hmmer.loadDomains('DEFB.txt', seqType='BlockSixFrame') print >> genomicFile, '\t'.join( domains[0].fields + ['strand', 'lowScoring', 'pseudogene', 'nCysteines']) for i, domain in enumerate(domains): if i > 99: break domain.domain = 'DEFB_%0.2i' % (i + 1) domain.toGenomic(relative=True) domain.addField('lowScoring', 'N') domain.addField('pseudogene', 'N') domain.addField('nCysteines', 0) summary = []
Author: Tony Papenfuss Date: Wed Aug 23 08:52:58 EST 2006 """ import os, sys import re, copy import fasta, sequence, hmmer3 from hmmer3 import hmmer2frame pattern = re.compile('[\*|X{200,}]') minLen = 20 i = 0 writer = fasta.MfaWriter('ORFs.fa') faFile = fasta.load_iter('6frames.fa') for header, seq in faFile: header = header.strip() print >> sys.stderr, header block, hmmerFrame = header.split(':') frame = hmmer2frame[int(hmmerFrame)] matchIter = pattern.finditer(seq) try: match = matchIter.next() except StopIteration: print match print seq sys.exit()
if len(args)!=3: sys.exit(__doc__) gffFilename = args[1] faFilename = args[2] data = gff.load(gffFilename) header,seq = fasta.load(faFilename) if options.oFilename: oFile = open(options.oFilename, 'w') else: oFile = sys.stdout writer = fasta.MfaWriter(oFile) for name in data: s = [] extrema = [] for f in data[name]: if f.type in options.features: if f.strand=='+': start,end = f.start,f.end _seq = seq[start-1:end] else: start,end = f.start,f.end _seq = seq[start-1:end] _seq = sequence.reverse_complement(_seq) s.append(_seq) extrema.append(f.start) extrema.append(f.end)
return isCoding, warnings, errors # ---------------------------------------------------------------------- ioDir = sys.argv[1] os.chdir(ioDir) oFile = open('checkResults.txt', 'w') sys.stdout = oFile hmmerModel = { 'round1': '../../HMMs/Defensin_beta.hmm', 'round2': '../../HMMs/Defensin_beta_new.hmm' }[ioDir] writer = fasta.MfaWriter('peptides.fa') annotFile = open('annot.txt', 'w') annotFilenames = glob.glob('gsAnnotations/*.txt') for annotFilename in annotFilenames: name = extractRootName(annotFilename) print '>>>', name + '\n' annotFilename = 'gsAnnotations/%s.txt' % name annotation = open(annotFilename).readlines() predictions = parseGenscan(annotation) pepFilename = 'gsPeptides/%s.fa' % name peptides = fasta.load_mfa(pepFilename) peptides = [(h.split()[0], s) for h, s in peptides] peptides = dict(peptides)