Esempio n. 1
0
def import_fasta(fastaFile):
    seqDict = fol.blast_dict(fastaFile)
    longList = fol.one_line_d(fastaFile).keys()
    descDict = {}
    lenDict = {}
    for header in seqDict.keys():
        lenDict[header] = len(seqDict[header])
    for header in longList:
        parts = header.rstrip("\n").lstrip(">").split(" ")
        descDict[parts[0]] = " ".join(parts[1:])
    return lenDict, descDict, seqDict
Esempio n. 2
0
import Fasta_one_line as fol

infile = open("transposase_clusters.txt", 'r')
fasta = fol.blast_dict(
    "/work/mpesesky/Plasmids/NCBI_Plasmids/taxonAnalysis/transposases.faa")

used_families = []

for line in infile:
    nodes = line.rstrip().split("\t")
    clusterName = nodes[0]
    if clusterName in used_families:
        print(clusterName)
        exit()
    outfileName = clusterName + ".faa"
    outfile = open(outfileName, 'w')

    for node in nodes[1:]:
        if "|" not in node:
            continue
        if node.startswith("ref"):
            nodeName = node
        else:
            nodeName = "ref|{}|".format(node.split("|")[1])

        try:
            seq = fasta[nodeName]
        except KeyError:
            print(nodeName)
            exit()
        outfile.write(">{}\n{}\n".format(nodeName, seq))