コード例 #1
0
ファイル: FindLocations.py プロジェクト: offbynull/learn
def find_kmer_locations(sequence: str, kmer: str,
                        options: Options = Options()) -> List[int]:
    # Construct test kmers
    test_kmers = set()
    test_kmers.add(kmer)
    [
        test_kmers.add(alt_kmer)
        for alt_kmer in find_all_dna_kmers_within_hamming_distance(
            kmer, options.hamming_distance)
    ]
    if options.reverse_complement:
        rc_kmer = reverse_complement(kmer)
        [
            test_kmers.add(alt_rc_kmer)
            for alt_rc_kmer in find_all_dna_kmers_within_hamming_distance(
                rc_kmer, options.hamming_distance)
        ]

    # Slide over the sequence's kmers and check for matches against test kmers
    k = len(kmer)
    idxes = []
    for seq_kmer, i in slide_window(sequence, k):
        if seq_kmer in test_kmers:
            idxes.append(i)
    return idxes
コード例 #2
0
 def neighborhood(kmer: str) -> Set[str]:
     neighbourhood = find_all_dna_kmers_within_hamming_distance(
         kmer, options.hamming_distance)
     if options.reverse_complement:
         kmer_rc = reverse_complement(kmer)
         neighbourhood = find_all_dna_kmers_within_hamming_distance(
             kmer_rc, options.hamming_distance)
     return neighbourhood
コード例 #3
0
def kmer_frequency_with_mismatches_and_reverse_complements(
        data: str, k: int, min_hamming_dist: int) -> Counter[str]:
    counter = Counter()
    for i in range(0, len(data) - k + 1):
        kmer = data[i:i + k]
        neighbourhood = find_all_dna_kmers_within_hamming_distance(
            kmer, min_hamming_dist)
        for neighbouring_kmer in neighbourhood:
            counter[neighbouring_kmer] += 1
        kmer_rc = reverse_complement(kmer)
        neighbourhood = find_all_dna_kmers_within_hamming_distance(
            kmer_rc, min_hamming_dist)
        for neighbouring_kmer in neighbourhood:
            counter[neighbouring_kmer] += 1
    return counter
コード例 #4
0
ファイル: FindRepeating.py プロジェクト: offbynull/learn
def count_kmers(data: str, k: int,
                options: Options = Options()) -> Counter[str]:
    counter = Counter()
    for kmer, i in slide_window(data, k):
        neighbourhood = find_all_dna_kmers_within_hamming_distance(
            kmer, options.hamming_distance)
        for neighbouring_kmer in neighbourhood:
            counter[neighbouring_kmer] += 1

        if options.reverse_complement:
            kmer_rc = reverse_complement(kmer)
            neighbourhood = find_all_dna_kmers_within_hamming_distance(
                kmer_rc, options.hamming_distance)
            for neighbouring_kmer in neighbourhood:
                counter[neighbouring_kmer] += 1

    return counter
コード例 #5
0
import sys

from ReverseComplementADnaKmer import reverse_complement

#with sys.stdin as f:
with open('/home/user/Downloads/dataset_240215_2.txt',
          mode='r',
          encoding='utf-8') as f:
    data = f.read()

lines = data.split('\n')
seq = lines[0]

seq_revcomp = reverse_complement(seq)

print(f'{seq_revcomp}')