コード例 #1
0
ファイル: Textbook_06D.py プロジェクト: ArnaudKOPP/challenge
def shared_kmers(k, dna1, dna2):
    '''Returns a list of positions for shared k-mers (up to reverse complement) in dna1 and dna2.'''
    # Store the starting index of all k-mers from dna1 in a dictionary keyed to the k-mer.
    dna1_dict = defaultdict(list)
    for i in xrange(len(dna1) - k + 1):
        dna1_dict[dna1[i:i + k]].append(i)

    # Check k-mers in dna2 against those in dna1, add matching index pairs to a set to remove possible duplicate entries.
    return {
        (i, j)
        for j in xrange(len(dna2) - k + 1)
        for i in dna1_dict[dna2[j:j + k]] + dna1_dict[rev_comp(dna2[j:j + k])]
    }
def shared_kmers(dna1, dna2, k):
    '''Returns a list of positions for shared kmers (up to reverse complement) in dna1 and dna2.'''
    from scripts import ReverseComplementDNA as rev_comp

    # Initialize the dictionary to store kmers.
    dna_dict = {}

    # Store the starting index of all kmers contained in dna1 in a list keyed to the kmer.
    for i in xrange(len(dna1) - k + 1):
        # Add the ith kmer.
        if dna1[i:i + k] in dna_dict:
            dna_dict[dna1[i:i + k]].append(i)
        else:
            dna_dict[dna1[i:i + k]] = [i]

        # Add the reverse complement of the ith kmer.
        if rev_comp(dna1[i:i + k]) in dna_dict:
            dna_dict[rev_comp(dna1[i:i + k])].append(i)
        else:
            dna_dict[rev_comp(dna1[i:i + k])] = [i]

    # Use a set to remove possible duplicate entries.
    common_kmers = set()

    # Check kmers in dna2 against those in dna1, adding matching indices to common_kmers.
    for j in xrange(len(dna2) - k + 1):
        # Check the jth kmer.
        if dna2[j:j + k] in dna_dict:
            for x in dna_dict[dna2[j:j + k]]:
                common_kmers.add((x, j))

        # Check the reverse complement of the jth kmer.
        if rev_comp(dna2[j:j + k]) in dna_dict:
            for x in dna_dict[rev_comp(dna2[j:j + k])]:
                common_kmers.add((x, j))

    return common_kmers
コード例 #3
0
def shared_kmers(dna1, dna2, k):
    '''Returns a list of positions for shared kmers (up to reverse complement) in dna1 and dna2.'''
    from scripts import ReverseComplementDNA as rev_comp

    # Initialize the dictionary to store kmers.
    dna_dict = {}

    # Store the starting index of all kmers contained in dna1 in a list keyed to the kmer.
    for i in xrange(len(dna1) - k + 1):
        # Add the ith kmer.
        if dna1[i:i+k] in dna_dict:
            dna_dict[dna1[i:i+k]].append(i)
        else:
            dna_dict[dna1[i:i+k]] = [i]

        # Add the reverse complement of the ith kmer.
        if rev_comp(dna1[i:i+k]) in dna_dict:
            dna_dict[rev_comp(dna1[i:i+k])].append(i)
        else:
            dna_dict[rev_comp(dna1[i:i+k])] = [i]

    # Use a set to remove possible duplicate entries.
    common_kmers = set()

    # Check kmers in dna2 against those in dna1, adding matching indices to common_kmers.
    for j in xrange(len(dna2) - k + 1):
        # Check the jth kmer.
        if dna2[j:j+k] in dna_dict:
            for x in dna_dict[dna2[j:j+k]]:
                common_kmers.add((x,j))

        # Check the reverse complement of the jth kmer.
        if rev_comp(dna2[j:j+k]) in dna_dict:
            for x in dna_dict[rev_comp(dna2[j:j+k])]:
                common_kmers.add((x,j))

    return common_kmers
コード例 #4
0
def shared_kmers(k, dna1, dna2):
    '''Returns a list of positions for shared k-mers (up to reverse complement) in dna1 and dna2.'''

    # Store the starting index of all k-mers from dna1 in a dictionary keyed to the k-mer.
    dna1_dict = defaultdict(list)
    for i in xrange(len(dna1) - k + 1):
        dna1_dict[dna1[i:i+k]].append(i)

    # Check k-mers in dna2 against those in dna1, add matching index pairs to a set to remove possible duplicate entries.
    shared_kmer_indices = set()
    for j in xrange(len(dna2) - k + 1):
        shared_kmer_indices |= set(map(lambda x: (x,j), dna1_dict[dna2[j:j+k]]))
        shared_kmer_indices |= set(map(lambda x: (x,j), dna1_dict[rev_comp(dna2[j:j+k])]))

    return shared_kmer_indices
コード例 #5
0
ファイル: Textbook_01H.py プロジェクト: wederw/Rosalind
def freq_words_with_mm_and_rev_comp(seq, k, d):
    """Returns all most frequent k-mers with up to d mismatches in the dna sequence seq."""
    # Frequency analysis so we don't generate mismatches for the same k-mer more than once.
    kmer_freq = defaultdict(int)
    for i in xrange(len(seq) - k + 1):
        kmer_freq[seq[i : i + k]] += 1
        kmer_freq[rev_comp(seq[i : i + k])] += 1

    # Get all of the mismatches for each unique k-mer in the sequence, appearing freq times.
    mismatch_count = defaultdict(int)
    for kmer, freq in kmer_freq.iteritems():
        for mismatch in kmer_mismatches(kmer, d):
            mismatch_count[mismatch] += freq

    # Computing the maximum value is somewhat time consuming to repeat, so only do it once!
    max_count = max(mismatch_count.values())
    return sorted([kmer for kmer, count in mismatch_count.iteritems() if count == max_count])
コード例 #6
0
ファイル: Textbook_01H.py プロジェクト: ArnaudKOPP/challenge
def freq_words_with_mm_and_rev_comp(seq, k, d):
    """Returns all most frequent k-mers with up to d mismatches in the dna sequence seq."""
    # Frequency analysis so we don't generate mismatches for the same k-mer more than once.
    kmer_freq = defaultdict(int)
    for i in xrange(len(seq) - k + 1):
        kmer_freq[seq[i:i + k]] += 1
        kmer_freq[rev_comp(seq[i:i + k])] += 1

    # Get all of the mismatches for each unique k-mer in the sequence, appearing freq times.
    mismatch_count = defaultdict(int)
    for kmer, freq in kmer_freq.iteritems():
        for mismatch in kmer_mismatches(kmer, d):
            mismatch_count[mismatch] += freq

    # Computing the maximum value is somewhat time consuming to repeat, so only do it once!
    max_count = max(mismatch_count.values())
    return sorted([
        kmer for kmer, count in mismatch_count.iteritems()
        if count == max_count
    ])