#!/usr/bin/python import dnaseq import bm_preproc import kmer_index human_chromosome = dnaseq.read_genome("chr1.GRCh38.excerpt.fasta") def approximate_matches(p, t, index): n = 2 matches = set() total_hits = 0 for i in range(0, 24, 8): pi = p[i:i+8] hits = index.query(pi); total_hits += len(hits) for hit in hits: if hit < i or hit - i + len(p) > len(t): continue missmatches = 0 for j in range(0, i): if p[j] != t[hit - i + j]: missmatches += 1 if missmatches > n: break for j in range(i + len(pi), len(p)): if p[j] != t[hit - i + j]: missmatches += 1 if missmatches > n: break if missmatches <= n:
#!/usr/bin/python import dnaseq import bm_preproc import kmer_index human_chromosome = dnaseq.read_genome("chr1.GRCh38.excerpt.fasta") def approximate_matches(p, t, index): n = 2 matches = set() total_hits = 0 for i in range(0, 24, 8): pi = p[i:i + 8] hits = index.query(pi) total_hits += len(hits) for hit in hits: if hit < i or hit - i + len(p) > len(t): continue missmatches = 0 for j in range(0, i): if p[j] != t[hit - i + j]: missmatches += 1 if missmatches > n: break for j in range(i + len(pi), len(p)): if p[j] != t[hit - i + j]: missmatches += 1 if missmatches > n: break
#!/usr/bin/python import dnaseq lambda_virus = dnaseq.read_genome("lambda_virus.fa") def question_01(): occurrences = dnaseq.naive_with_rc(lambda_virus, "AGGT") print "question_01: %i" % len(occurrences) def question_02(): occurrences = dnaseq.naive_with_rc(lambda_virus, "TTAA") print "question_02: %i" % len(occurrences) def question_03(): occurrences = dnaseq.naive_with_rc(lambda_virus, "ACTAAGT") print "question_03: %i" % occurrences[0] def question_04(): occurrences = dnaseq.naive_with_rc(lambda_virus, "AGTCGA") print "question_04: %i" % occurrences[0] def question_05(): occurrences = dnaseq.naive_approximate(lambda_virus, "TTCAAGCC", 2) print "question_05: %i" % len(occurrences) def question_06(): occurrences = dnaseq.naive_approximate(lambda_virus, "AGGAGGTT", 2) print "question_06: %i" % occurrences[0] def question_07():