Python parseFasta 예제들, fasta.parseFasta Python 예제들

예제 #1

0

파일 보기

파일: getorf.py 프로젝트: dongqing7/ABFGP

def parseGetorfOutput(content):
    """
    Parse getorf output file(handle), returns dictionaty with orfs

    @type  content: open filehandle
    @param content: filehandle of a EMBOSS getorf output fasta file

    @rtype:  dictionary
    @return: dictionary with keys (original fastaheader, orf-start, orf-stop) and sequence as values

    @attention: content can be: sys.stdin.readlines()
    @attention: content can be: fh.readlines()
    """
    orfs = {}
    for header, sequence in parseFasta(content).iteritems():
	# below 3 lines retrieves start coordinates from getorf output
	(fref,whatever)= header.split("_",1)
	(start,stop)  = header.split(" ",1)[-1][1:-1].split(" - ")
	(start,stop)  = (int(start),int(stop))
	name = (fref,start,stop)
	orfs[name] = sequence
    # return fasta dictionary with openreadingframe info
    return orfs

예제 #2

0

파일 보기

def parse_cexpander(cexpanderdata, fname_fasta):
    """
    Parse the cexpander_dr output file into a CexpanderOutput class object

    @type  fname_cexpander: string
    @param fname_cexpander: (absolute) path to cexpander_dr output file

    @type  fname_fasta: string
    @param fname_fasta: (absolute) path to fasta input file

    @rtype:  CexpanderOutput object
    @return: CexpanderOutput object
    """

    # open file to txt string and initialize empty CexpanderOutput object
    cxpOut = CexpanderOutput()
    cxpOut.sequences = parseFasta(open(fname_fasta).readlines())
    data = cexpanderdata.split("\n\n")[0:-1]

    # generate header list; omit first 5 lines (cexpander STDOUT messages)
    headers = data.pop(0)
    headers = [
        line.split("\t")[2].replace(">", "")
        for line in headers.split("\n")[5:]
    ]

    # loop over the `transfer blocks` in the file
    for pos in range(0, len(data)):
        cxpTrfblck = CexpanderTransferBlock()
        if not set([
                line.split("\t")[1] for line in data[pos][1:].split("\n")[1:]
        ]).difference(['0', '1']):
            # cexpander in binary mode
            # single-line string of zeros (0) and ones (1)
            #cxpTrfblck.binarystring = ''.join( [ line.split("\t")[1] for line in data[pos][1:].split("\n")[1:] ] )
            mode = "binary"
            cxpTrfblck.binarystring = ""
            for line in data[pos][1:].split("\n")[1:]:
                for cell in line.strip().split("\t")[1:]:
                    cxpTrfblck.binarystring += cell
        else:
            # cexpander in float mode
            mode = "float"
            cxpTrfblck.binarystring = []
            for line in data[pos][1:].split("\n")[1:]:
                for cell in line.strip().split("\t")[1:]:
                    cxpTrfblck.binarystring.append(float(cell))

        cxpTrfblck.header = headers[pos]
        cxpTrfblck.sequence = cxpOut.sequences[cxpTrfblck.header]
        cxpTrfblck.positions = len(cxpTrfblck.sequence)
        cxpTrfblck.uniform = cxpTrfblck.get_uniform_positions()
        cxpTrfblck.score = len(cxpTrfblck.uniform)
        cxpTrfblck.ratio = cxpTrfblck._binarystring2matchratio(
            cxpTrfblck.binarystring)
        cxpTrfblck.mode = mode

        if cxpTrfblck.positions != len(cxpTrfblck.binarystring):
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.positions, "!=", len(
                cxpTrfblck.binarystring)
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.header, mode
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.sequence
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.binarystring
            print "#" * 40
            print "".join(data[pos])
            print "#" * 40

        # add CexpanderTransferBlock to CexpanderOutput object
        cxpOut.add_transferblock(cxpTrfblck)

    # return the created object
    return cxpOut

예제 #3

0

파일 보기

파일: example3.py 프로젝트: Wyss/MOODS

import unittest
import sys
from os.path import join, abspath, dirname

LOCAL_DIR = abspath(dirname(__file__))
sys.path.append(abspath(join(LOCAL_DIR, '..')))

import MOODS

import fasta

DIST_DIR = abspath(dirname(dirname(LOCAL_DIR)))
print(DIST_DIR)
fasta_filepath = join(DIST_DIR, "examples/data/sequence/dnaACGT.txt")
records = fasta.parseFasta(fasta_filepath)

seq = records[0][1]

matrix1 = [     [0,1,0,0,0,0,0,1,1,0],
                [1,0,0,0,0,0,0,0,0,0],
                [0,0,0,0,0,0,0,0,0,0],
                [0,0,1,1,1,1,1,0,0,1]
            ]
matrix2 = [     [10,0,10,3,5,5],
                [0,5,0,3,5,0,5],
                [0,1,0,3,0,5,0],
                [0,4,0,1,0,0,5]
            ]

results = MOODS.search(seq, [matrix1, matrix2], 0.011)

예제 #4

0

파일 보기

import unittest
import sys
from os.path import join, abspath, dirname

LOCAL_DIR = abspath(dirname(__file__))
sys.path.append(abspath(join(LOCAL_DIR, '..')))

import MOODS

import fasta

DIST_DIR = abspath(dirname(dirname(LOCAL_DIR)))
print(DIST_DIR)
fasta_filepath = join(DIST_DIR, "examples/data/sequence/dnaACGT.txt")
records = fasta.parseFasta(fasta_filepath)

seq = records[0][1]

matrix1 = [[0, 1, 0, 0, 0, 0, 0, 1, 1, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 0, 0, 1]]
matrix2 = [[10, 0, 10, 3, 5, 5], [0, 5, 0, 3, 5, 0, 5], [0, 1, 0, 3, 0, 5, 0],
           [0, 4, 0, 1, 0, 0, 5]]

results = MOODS.search(seq, [matrix1, matrix2], 0.011)

print("Matrix 1 results: " + str(len(results[0])))
print("Matrix 2 results: " + str(len(results[1])))

matrices = [matrix1, matrix2]
thresholds = [0.011, 0.011]
bg = MOODS.bg_from_sequence(seq, 0.1)

예제 #5

0

파일 보기

파일: cexpander.py 프로젝트: IanReid/ABFGP

def parse_cexpander(cexpanderdata,fname_fasta):
    """
    Parse the cexpander_dr output file into a CexpanderOutput class object

    @type  fname_cexpander: string
    @param fname_cexpander: (absolute) path to cexpander_dr output file

    @type  fname_fasta: string
    @param fname_fasta: (absolute) path to fasta input file

    @rtype:  CexpanderOutput object
    @return: CexpanderOutput object
    """

    # open file to txt string and initialize empty CexpanderOutput object
    cxpOut = CexpanderOutput()
    cxpOut.sequences = parseFasta(open(fname_fasta).readlines())
    data   = cexpanderdata.split("\n\n")[0:-1]

    # generate header list; omit first 5 lines (cexpander STDOUT messages)
    headers = data.pop(0)
    headers = [ line.split("\t")[2].replace(">","") for line in headers.split("\n")[5:] ]

    # loop over the `transfer blocks` in the file
    for pos in range(0,len(data)):
        cxpTrfblck = CexpanderTransferBlock()
        if not set([ line.split("\t")[1] for line in data[pos][1:].split("\n")[1:] ]).difference(['0','1']):
            # cexpander in binary mode
            # single-line string of zeros (0) and ones (1)
            #cxpTrfblck.binarystring = ''.join( [ line.split("\t")[1] for line in data[pos][1:].split("\n")[1:] ] )
            mode = "binary"
            cxpTrfblck.binarystring = "" 
            for line in data[pos][1:].split("\n")[1:]:
                for cell in line.strip().split("\t")[1:]:
                    cxpTrfblck.binarystring+=cell
        else:
            # cexpander in float mode
            mode = "float"
            cxpTrfblck.binarystring = []
            for line in data[pos][1:].split("\n")[1:]:
                for cell in line.strip().split("\t")[1:]:
                    cxpTrfblck.binarystring.append(float(cell))

        cxpTrfblck.header       = headers[pos]
        cxpTrfblck.sequence     = cxpOut.sequences[cxpTrfblck.header]
        cxpTrfblck.positions    = len(cxpTrfblck.sequence)
        cxpTrfblck.uniform      = cxpTrfblck.get_uniform_positions()
        cxpTrfblck.score        = len(cxpTrfblck.uniform)
        cxpTrfblck.ratio        = cxpTrfblck._binarystring2matchratio(cxpTrfblck.binarystring)
        cxpTrfblck.mode         = mode

        if cxpTrfblck.positions != len(cxpTrfblck.binarystring):
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.positions, "!=", len(cxpTrfblck.binarystring)
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.header, mode
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.sequence
            print "CEXPANDER-PARSE-ERROR:", cxpTrfblck.binarystring
            print "#"*40
            print "".join(data[pos])
            print "#"*40

        # add CexpanderTransferBlock to CexpanderOutput object
        cxpOut.add_transferblock(cxpTrfblck)

    # return the created object
    return cxpOut