def gatherFinalResults(profile, sequences, worstScoringMotif, strand): """ Find all locations in the Agrobacterium tumefaciens C58 genome where our profile achieves a score as high as the worst scoring motif (returned by gibbsSampling()) Args: profile (dictionary): Profile of the motifs returned by gibbsSampling() sequences (string): List of DNA sequences worstScoringMotif (dictionary): Dictionary containing important info about the worst scoring motif (from the best scoring ones got from gibbsSampling()) strand (int): 1 for the sequence from the file / 2 from the reverse complement Returns: dictionary: A list of dictionaries of the motifs that score as high as worstScoringMotif """ dnaScores = [] for sequenceNumber in range(0, len(sequences)): # Applies a profile (gets single score for each subsequence) for each kmer in the current sequence applyProfileScores = mf.applyProfile(profile, sequences[sequenceNumber]) for i in range(0, len(applyProfileScores)): # Find motifs that has a score as high as the worst scoring motif's score if applyProfileScores[i] >= worstScoringMotif["Score"]: # Searches for the closest protein start codon str1 = sequences[sequenceNumber][0 : i + 1] str2 = sequences[sequenceNumber][i + len(profile) : len(sequences[sequenceNumber])] str1Start = str1.rfind("ATG") str2Start = str2.find("ATG") if str1Start == -1: str1Start = 1000 if str2Start == -1: str2Start = 1000 if str1Start < str2Start: start = str1Start elif str1Start > str2Start: start = str2Start + i + len(profile) else: start = "None" # Adds the found motif and its info to a list of dictionaries that is gonna be returned by the function dnaScores.append( { "50mer-Sequence": sequences[sequenceNumber][i : i + len(profile) + (50 - len(profile))], "Position": i, "Score": applyProfileScores[i], "Closest-Protein-Coding-Gene": start, "DNASequence#": sequenceNumber + 1, "Strand #": strand, } ) return dnaScores
""" File containing the functions test for motifFinder.py """ import motifFinder # Tests ------------------------------------------------------------------ print(motifFinder.readInput('TraR.txt')) print(motifFinder.randomStart(['ACACGTAC', 'CCACGTCACA', 'TTCGTCGTACG'], 4)) print(motifFinder.getMotif(['ACACGTAC', 'CCACGTCACA', 'TTCGTCGTACG'], [3, 5, 2], 4)) print(motifFinder.constructProfile(['CGTA', 'TCAC', 'CGTC'])) print(motifFinder.getSingleScore(motifFinder.constructProfile(['CGTA', 'TCAC', 'CGTC']), 'CACA')) print(motifFinder.applyProfile(motifFinder.constructProfile(['CGTA', 'TCAC', 'CGTC']), 'CCACGTCACA')) print(motifFinder.randomlySelect([0.014994, 0.001249, 0.000833, 0.033736, 0.000833, 0.009996, 0.002499])) print(motifFinder.nucleotideFrequencies(['ACACGTAC', 'CCACGTCACA', 'TTCGTCGTACG'])) print(motifFinder.scoreProfile([{'A': 0.142857, 'C': 0.428571, 'G': 0.142857, 'T': 0.285714}, {'A': 0.142857, 'C': 0.285714, 'G': 0.428571, 'T': 0.142857}, {'A': 0.285714, 'C': 0.142857, 'G': 0.142857, 'T': 0.428571}, {'A': 0.285714, 'C': 0.428571, 'G': 0.142857, 'T': 0.142857}], {'A': 0.241379, 'C': 0.379310, 'G': 0.172413, 'T': 0.206897}))