예제 #1
0
#!/usr/bin/python

import dnaseq
import bm_preproc
import kmer_index

human_chromosome = dnaseq.read_genome("chr1.GRCh38.excerpt.fasta")

def approximate_matches(p, t, index):
	n = 2
	matches = set()
	total_hits = 0
	for i in range(0, 24, 8):
		pi = p[i:i+8]
		hits = index.query(pi); 
		total_hits += len(hits)
		for hit in hits:
			if hit < i or hit - i + len(p) > len(t):
				continue
			missmatches = 0
			for j in range(0, i):
				if p[j] != t[hit - i + j]:
					missmatches += 1
					if missmatches > n:
						break
			for j in range(i + len(pi), len(p)):
				if p[j] != t[hit - i + j]:
					missmatches += 1
					if missmatches > n:
						break
			if missmatches <= n:
예제 #2
0
#!/usr/bin/python

import dnaseq
import bm_preproc
import kmer_index

human_chromosome = dnaseq.read_genome("chr1.GRCh38.excerpt.fasta")


def approximate_matches(p, t, index):
    n = 2
    matches = set()
    total_hits = 0
    for i in range(0, 24, 8):
        pi = p[i:i + 8]
        hits = index.query(pi)
        total_hits += len(hits)
        for hit in hits:
            if hit < i or hit - i + len(p) > len(t):
                continue
            missmatches = 0
            for j in range(0, i):
                if p[j] != t[hit - i + j]:
                    missmatches += 1
                    if missmatches > n:
                        break
            for j in range(i + len(pi), len(p)):
                if p[j] != t[hit - i + j]:
                    missmatches += 1
                    if missmatches > n:
                        break
예제 #3
0
#!/usr/bin/python

import dnaseq

lambda_virus = dnaseq.read_genome("lambda_virus.fa")

def question_01():
	occurrences = dnaseq.naive_with_rc(lambda_virus, "AGGT")
	print "question_01: %i" % len(occurrences)

def question_02():
	occurrences = dnaseq.naive_with_rc(lambda_virus, "TTAA")
	print "question_02: %i" % len(occurrences)

def question_03():
	occurrences = dnaseq.naive_with_rc(lambda_virus, "ACTAAGT")
	print "question_03: %i" % occurrences[0]

def question_04():
	occurrences = dnaseq.naive_with_rc(lambda_virus, "AGTCGA")
	print "question_04: %i" % occurrences[0]

def question_05():
	occurrences = dnaseq.naive_approximate(lambda_virus, "TTCAAGCC", 2)
	print "question_05: %i" % len(occurrences)

def question_06():
	occurrences = dnaseq.naive_approximate(lambda_virus, "AGGAGGTT", 2)
	print "question_06: %i" % occurrences[0]

def question_07():