def find_kmer_locations(sequence: str, kmer: str, options: Options = Options()) -> List[int]: # Construct test kmers test_kmers = set() test_kmers.add(kmer) [ test_kmers.add(alt_kmer) for alt_kmer in find_all_dna_kmers_within_hamming_distance( kmer, options.hamming_distance) ] if options.reverse_complement: rc_kmer = reverse_complement(kmer) [ test_kmers.add(alt_rc_kmer) for alt_rc_kmer in find_all_dna_kmers_within_hamming_distance( rc_kmer, options.hamming_distance) ] # Slide over the sequence's kmers and check for matches against test kmers k = len(kmer) idxes = [] for seq_kmer, i in slide_window(sequence, k): if seq_kmer in test_kmers: idxes.append(i) return idxes
def neighborhood(kmer: str) -> Set[str]: neighbourhood = find_all_dna_kmers_within_hamming_distance( kmer, options.hamming_distance) if options.reverse_complement: kmer_rc = reverse_complement(kmer) neighbourhood = find_all_dna_kmers_within_hamming_distance( kmer_rc, options.hamming_distance) return neighbourhood
def kmer_frequency_with_mismatches_and_reverse_complements( data: str, k: int, min_hamming_dist: int) -> Counter[str]: counter = Counter() for i in range(0, len(data) - k + 1): kmer = data[i:i + k] neighbourhood = find_all_dna_kmers_within_hamming_distance( kmer, min_hamming_dist) for neighbouring_kmer in neighbourhood: counter[neighbouring_kmer] += 1 kmer_rc = reverse_complement(kmer) neighbourhood = find_all_dna_kmers_within_hamming_distance( kmer_rc, min_hamming_dist) for neighbouring_kmer in neighbourhood: counter[neighbouring_kmer] += 1 return counter
def count_kmers(data: str, k: int, options: Options = Options()) -> Counter[str]: counter = Counter() for kmer, i in slide_window(data, k): neighbourhood = find_all_dna_kmers_within_hamming_distance( kmer, options.hamming_distance) for neighbouring_kmer in neighbourhood: counter[neighbouring_kmer] += 1 if options.reverse_complement: kmer_rc = reverse_complement(kmer) neighbourhood = find_all_dna_kmers_within_hamming_distance( kmer_rc, options.hamming_distance) for neighbouring_kmer in neighbourhood: counter[neighbouring_kmer] += 1 return counter
import sys from ReverseComplementADnaKmer import reverse_complement #with sys.stdin as f: with open('/home/user/Downloads/dataset_240215_2.txt', mode='r', encoding='utf-8') as f: data = f.read() lines = data.split('\n') seq = lines[0] seq_revcomp = reverse_complement(seq) print(f'{seq_revcomp}')