def loadHmmerData(filename, eValueCutoff=0.1, basename='hmm'): domains = hmmer.loadDomains(filename, seqType='BlockSixFrame') data = [] for i,d in enumerate(domains): if d.eValue<=eValueCutoff: # print d name = '%s%0.2i' % (basename, i) d.toGenomic() f = Feature(name,d.accession,d.sStart,d.sEnd,d.strand,d.eValue) print f data.append(f) data = DSU(data, lambda x: (x.chrom, x.start)) return data
if len(warnings2)>0: h = '%s %s' % (name, '; '.join(warnings2)) else: h = name writer.write(h, seq+'\n') for exon in prediction: exon.gene_exon = name print >> annotFile, exon print >> annotFile else: print 'Annotation warnings:', warnings print 'Annotation errors: ', errors print hmmerFilename = 'gsPepHmmer/%s.txt' % name os.system('hmmsearch %s %s > %s' % (hmmerModel, pepFilename, hmmerFilename)) domains = hmmer.loadDomains(hmmerFilename) if domains: print 'Defensin motifs:' for d in domains: print d else: print 'No defensin motifs' print print '='*80 + '\n' writer.close() annotFile.close() oFile.close()
import os, sys import hmmer, fasta, sequence homeDir = os.environ['HOME'] blastdb = os.path.join(homeDir, 'databases/opossum/assembly/blastdb/assembly') ioDir = sys.argv[1] os.chdir(ioDir) genomicFile = open('DEFB_genomic.txt', 'w') summaryFile = open('DEFB_summary.txt', 'w') dnaWriter = fasta.MfaWriter('DEFB_extracted.fa') pepWriter = fasta.MfaWriter('DEFB_extracted_pep.fa') domains = hmmer.loadDomains('DEFB.txt', seqType='BlockSixFrame') print >> genomicFile, '\t'.join( domains[0].fields + ['strand', 'lowScoring', 'pseudogene', 'nCysteines']) for i, domain in enumerate(domains): if i > 99: break domain.domain = 'DEFB_%0.2i' % (i + 1) domain.toGenomic(relative=True) domain.addField('lowScoring', 'N') domain.addField('pseudogene', 'N') domain.addField('nCysteines', 0) summary = [] h, s = fasta.getSequence(blastdb, domain.accession, start=domain.sStart,
import hmmer, fasta, sequence homeDir = os.environ['HOME'] blastdb = os.path.join(homeDir, 'databases/opossum/assembly/blastdb/assembly') ioDir = sys.argv[1] os.chdir(ioDir) genomicFile = open('DEFB_genomic.txt', 'w') summaryFile = open('DEFB_summary.txt', 'w') dnaWriter = fasta.MfaWriter('DEFB_extracted.fa') pepWriter = fasta.MfaWriter('DEFB_extracted_pep.fa') domains = hmmer.loadDomains('DEFB.txt', seqType='BlockSixFrame') print >> genomicFile, '\t'.join(domains[0].fields + ['strand', 'lowScoring', 'pseudogene', 'nCysteines']) for i,domain in enumerate(domains): if i>99: break domain.domain = 'DEFB_%0.2i' % (i+1) domain.toGenomic(relative=True) domain.addField('lowScoring', 'N') domain.addField('pseudogene', 'N') domain.addField('nCysteines', 0) summary = [] h,s = fasta.getSequence(blastdb, domain.accession, start=domain.sStart, end=domain.sEnd, strand=domain.strand) pep = sequence.translate(s)
h = '%s %s' % (name, '; '.join(warnings2)) else: h = name writer.write(h, seq + '\n') for exon in prediction: exon.gene_exon = name print >> annotFile, exon print >> annotFile else: print 'Annotation warnings:', warnings print 'Annotation errors: ', errors print hmmerFilename = 'gsPepHmmer/%s.txt' % name os.system('hmmsearch %s %s > %s' % (hmmerModel, pepFilename, hmmerFilename)) domains = hmmer.loadDomains(hmmerFilename) if domains: print 'Defensin motifs:' for d in domains: print d else: print 'No defensin motifs' print print '=' * 80 + '\n' writer.close() annotFile.close() oFile.close()