Example #1
0
def runGenomescan(features, debug=False):
    """Run genomescan supplying extracted features from genome as 
    homologous proteins."""

    for i, feature in enumerate(features):
        print i + 1, feature
        oFilename = 'gsOutput/%s.html' % feature.domain
        annotFilename = 'gsAnnotations/%s.txt' % feature.domain
        pepFilename = 'gsPeptides/%s.fa' % feature.domain

        if not os.path.exists(oFilename):
            html = genomescanFromFeature(feature,
                                         blastDb,
                                         oFileHandle=oFilename)
        else:
            html = open(oFilename).read()

        html = html.split('\n')
        annotation, peptides = parseGenomeScanOutput(html)

        print >> open(annotFilename, 'w'), annotation

        fakeFaFile = StringIO.StringIO(peptides)
        faIter = fasta.load_iter(fakeFaFile)
        writer = fasta.MfaWriter(pepFilename)
        for j, (h, s) in enumerate(faIter):
            block = h.split('|')[0]
            if j == 1: print feature
            h = '%s.%i %s' % (feature.domain, j + 1, block)
            writer.write(h, s + '\n')
        writer.close()
Example #2
0
def Initialize():
    header, seq = fasta.load('MHC_hg18.fa')
    sixFrameIter = sequence.sixFrameTranslationIter(seq)
    writer = fasta.MfaWriter('6frames.fa')
    for frame, p in sixFrameIter:
        print 'Frame:', frame
        writer.write('%s:%i' % (header, frame), p)
    writer.close()
    sys.exit()
Example #3
0
import os, sys
import re, copy
import fasta, sequence, hmmer3

seqFilename = sys.argv[1]

header, seq = fasta.load(seqFilename)
header = header.split()[0]
L = len(seq)

pattern = re.compile('\*')
minLen = 10

sixFrameIter = sequence.sixFrameTranslationIter(seq)

writer = fasta.MfaWriter(sys.stdout)
i = 0
for frame, p in sixFrameIter:
    print >> sys.stderr, 'Frame:', frame

    matchIter = pattern.finditer(p)
    match = matchIter.next()
    start = match.start()
    for match in matchIter:
        end = match.start()
        orf = p[start + 1:end]
        length = len(orf)
        if length >= minLen:
            gStart, gEnd, strand = hmmer3.convertSixFrameToGenomic(
                start + 1, end + 1, frame, L)
Date: Tue Aug 15 10:18:46 EST 2006

"""

import os, sys
import hmmer, fasta, sequence

homeDir = os.environ['HOME']
blastdb = os.path.join(homeDir, 'databases/opossum/assembly/blastdb/assembly')

ioDir = sys.argv[1]
os.chdir(ioDir)

genomicFile = open('DEFB_genomic.txt', 'w')
summaryFile = open('DEFB_summary.txt', 'w')
dnaWriter = fasta.MfaWriter('DEFB_extracted.fa')
pepWriter = fasta.MfaWriter('DEFB_extracted_pep.fa')

domains = hmmer.loadDomains('DEFB.txt', seqType='BlockSixFrame')
print >> genomicFile, '\t'.join(
    domains[0].fields + ['strand', 'lowScoring', 'pseudogene', 'nCysteines'])

for i, domain in enumerate(domains):
    if i > 99: break
    domain.domain = 'DEFB_%0.2i' % (i + 1)
    domain.toGenomic(relative=True)
    domain.addField('lowScoring', 'N')
    domain.addField('pseudogene', 'N')
    domain.addField('nCysteines', 0)
    summary = []
Example #5
0
Author: Tony Papenfuss
Date: Wed Aug 23 08:52:58 EST 2006

"""

import os, sys
import re, copy
import fasta, sequence, hmmer3
from hmmer3 import hmmer2frame

pattern = re.compile('[\*|X{200,}]')
minLen = 20

i = 0
writer = fasta.MfaWriter('ORFs.fa')
faFile = fasta.load_iter('6frames.fa')

for header, seq in faFile:
    header = header.strip()
    print >> sys.stderr, header
    block, hmmerFrame = header.split(':')
    frame = hmmer2frame[int(hmmerFrame)]

    matchIter = pattern.finditer(seq)
    try:
        match = matchIter.next()
    except StopIteration:
        print match
        print seq
        sys.exit()
Example #6
0
if len(args)!=3:
    sys.exit(__doc__)

gffFilename = args[1]
faFilename = args[2]

data = gff.load(gffFilename)
header,seq = fasta.load(faFilename)

if options.oFilename:
    oFile = open(options.oFilename, 'w')
else:
    oFile = sys.stdout

writer = fasta.MfaWriter(oFile)
for name in data:
    s = []
    extrema = []
    for f in data[name]:
        if f.type in options.features:
            if f.strand=='+':
                start,end = f.start,f.end
                _seq = seq[start-1:end]
            else:    
                start,end = f.start,f.end
                _seq = seq[start-1:end]
                _seq = sequence.reverse_complement(_seq)
            s.append(_seq)
            extrema.append(f.start)
            extrema.append(f.end)
Example #7
0
    return isCoding, warnings, errors


# ----------------------------------------------------------------------

ioDir = sys.argv[1]
os.chdir(ioDir)
oFile = open('checkResults.txt', 'w')
sys.stdout = oFile

hmmerModel = {
    'round1': '../../HMMs/Defensin_beta.hmm',
    'round2': '../../HMMs/Defensin_beta_new.hmm'
}[ioDir]

writer = fasta.MfaWriter('peptides.fa')
annotFile = open('annot.txt', 'w')

annotFilenames = glob.glob('gsAnnotations/*.txt')
for annotFilename in annotFilenames:
    name = extractRootName(annotFilename)
    print '>>>', name + '\n'

    annotFilename = 'gsAnnotations/%s.txt' % name
    annotation = open(annotFilename).readlines()
    predictions = parseGenscan(annotation)

    pepFilename = 'gsPeptides/%s.fa' % name
    peptides = fasta.load_mfa(pepFilename)
    peptides = [(h.split()[0], s) for h, s in peptides]
    peptides = dict(peptides)