def test_alignment_std_local(err): A = Alphabet('ACGT') M = MutationProcess(A, subst_probs=err, go_prob=err, ge_prob=err) subst_scores, (go_score, ge_score) = M.log_odds_scores() S = rand_seq(A, 100) T, tx = M.mutate(S) T = A.parse('A' * 100) + T + A.parse('G' * 100) mutation_aln = Alignment(S, T, tx) mutation_score = mutation_aln.calculate_score(subst_scores, go_score, ge_score) aligner = Aligner(S, T, subst_scores=subst_scores, go_score=go_score, ge_score=ge_score, alnmode=STD_MODE, alntype=LOCAL) with aligner: reported_score = aligner.solve() assert round(reported_score, 3) >= round(mutation_score, 3), \ 'optimal alignment scores better than the known transcript' alignment = aligner.traceback() aln_score = alignment.calculate_score(subst_scores, go_score, ge_score) assert round(aln_score, 3) == round(reported_score, 3), \ 'The alignment score should be calculated correctly' ori_len = Alignment.projected_len(alignment.transcript, on='origin') mut_len = Alignment.projected_len(alignment.transcript, on='mutant') assert ori_len <= len(S) and mut_len < len(T), \ 'Local alignments do not cover the entirety of both sequences'
def sequencing_sample(request): """Creates a random sequence, generates reads, with parameterized mutation probabilities, of equal length starting at whole multiples of half of read length. It is expected that successive reads have an overlap starting at their halfway position. Returns: tuple: A tuple containing the full genome, a list of reads, the gap probability and the seed index. """ A = Alphabet('ACGT') gap_prob, subst_prob, wordlen = request.param seq_len, read_len = 2000, 500 seq = rand_seq(A, seq_len).to_named('genome') mutation_process = MutationProcess(A, subst_probs=subst_prob, go_prob=gap_prob, ge_prob=gap_prob) reads = [] for i in range(0, seq_len - read_len, int(read_len/2)): read, _ = mutation_process.mutate(seq[i: i + read_len]) reads += [read.to_named('read#%d' % i)] db = DB(':memory:', A) kmer_index = KmerIndex(db, wordlen) seed_index = SeedIndex(kmer_index) seed_index.db.initialize() records = [db.insert(r) for r in reads] return seq, reads, records, gap_prob, seed_index