コード例 #1
0
def gatherFinalResults(profile, sequences, worstScoringMotif, strand):
    """ Find all locations in the Agrobacterium tumefaciens C58 genome where our
        profile achieves a score as high as the worst scoring motif (returned by gibbsSampling())
    Args:
        profile (dictionary): Profile of the motifs returned by gibbsSampling()
        sequences (string): List of DNA sequences
        worstScoringMotif (dictionary): Dictionary containing important info about the worst scoring motif
            (from the best scoring ones got from gibbsSampling())
        strand (int): 1 for the sequence from the file / 2 from the reverse complement
    Returns:
        dictionary: A list of dictionaries of the motifs that score as high as worstScoringMotif
    """
    dnaScores = []
    for sequenceNumber in range(0, len(sequences)):
        # Applies a profile (gets single score for each subsequence) for each kmer in the current sequence
        applyProfileScores = mf.applyProfile(profile, sequences[sequenceNumber])
        for i in range(0, len(applyProfileScores)):
            # Find motifs that has a score as high as the worst scoring motif's score
            if applyProfileScores[i] >= worstScoringMotif["Score"]:
                # Searches for the closest protein start codon
                str1 = sequences[sequenceNumber][0 : i + 1]
                str2 = sequences[sequenceNumber][i + len(profile) : len(sequences[sequenceNumber])]
                str1Start = str1.rfind("ATG")
                str2Start = str2.find("ATG")
                if str1Start == -1:
                    str1Start = 1000
                if str2Start == -1:
                    str2Start = 1000
                if str1Start < str2Start:
                    start = str1Start
                elif str1Start > str2Start:
                    start = str2Start + i + len(profile)
                else:
                    start = "None"
                # Adds the found motif and its info to a list of dictionaries that is gonna be returned by the function
                dnaScores.append(
                    {
                        "50mer-Sequence": sequences[sequenceNumber][i : i + len(profile) + (50 - len(profile))],
                        "Position": i,
                        "Score": applyProfileScores[i],
                        "Closest-Protein-Coding-Gene": start,
                        "DNASequence#": sequenceNumber + 1,
                        "Strand #": strand,
                    }
                )

    return dnaScores
コード例 #2
0
ファイル: tests.py プロジェクト: bressan3/Bioinformatics-2
""" File containing the functions test for motifFinder.py """
import motifFinder


# Tests ------------------------------------------------------------------

print(motifFinder.readInput('TraR.txt'))
print(motifFinder.randomStart(['ACACGTAC', 'CCACGTCACA', 'TTCGTCGTACG'], 4))
print(motifFinder.getMotif(['ACACGTAC', 'CCACGTCACA', 'TTCGTCGTACG'], [3, 5, 2], 4))
print(motifFinder.constructProfile(['CGTA', 'TCAC', 'CGTC']))
print(motifFinder.getSingleScore(motifFinder.constructProfile(['CGTA', 'TCAC', 'CGTC']), 'CACA'))
print(motifFinder.applyProfile(motifFinder.constructProfile(['CGTA', 'TCAC', 'CGTC']), 'CCACGTCACA'))
print(motifFinder.randomlySelect([0.014994, 0.001249, 0.000833, 0.033736, 0.000833, 0.009996, 0.002499]))
print(motifFinder.nucleotideFrequencies(['ACACGTAC', 'CCACGTCACA', 'TTCGTCGTACG']))
print(motifFinder.scoreProfile([{'A': 0.142857, 'C': 0.428571, 'G': 0.142857, 'T': 0.285714}, {'A': 0.142857, 'C': 0.285714, 'G': 0.428571, 'T': 0.142857}, {'A': 0.285714, 'C': 0.142857, 'G': 0.142857, 'T': 0.428571}, {'A': 0.285714, 'C': 0.428571, 'G': 0.142857, 'T': 0.142857}], {'A': 0.241379, 'C': 0.379310, 'G': 0.172413, 'T': 0.206897}))