예제 #1
0
파일: test_pw.py 프로젝트: amirkdv/biseqt
def test_alignment_std_local(err):
    A = Alphabet('ACGT')
    M = MutationProcess(A, subst_probs=err, go_prob=err, ge_prob=err)
    subst_scores, (go_score, ge_score) = M.log_odds_scores()

    S = rand_seq(A, 100)
    T, tx = M.mutate(S)
    T = A.parse('A' * 100) + T + A.parse('G' * 100)
    mutation_aln = Alignment(S, T, tx)
    mutation_score = mutation_aln.calculate_score(subst_scores, go_score,
                                                  ge_score)

    aligner = Aligner(S, T, subst_scores=subst_scores, go_score=go_score,
                      ge_score=ge_score, alnmode=STD_MODE, alntype=LOCAL)
    with aligner:
        reported_score = aligner.solve()
        assert round(reported_score, 3) >= round(mutation_score, 3), \
            'optimal alignment scores better than the known transcript'
        alignment = aligner.traceback()
        aln_score = alignment.calculate_score(subst_scores, go_score, ge_score)
        assert round(aln_score, 3) == round(reported_score, 3), \
            'The alignment score should be calculated correctly'

        ori_len = Alignment.projected_len(alignment.transcript, on='origin')
        mut_len = Alignment.projected_len(alignment.transcript, on='mutant')
        assert ori_len <= len(S) and mut_len < len(T), \
            'Local alignments do not cover the entirety of both sequences'
예제 #2
0
def sequencing_sample(request):
    """Creates a random sequence, generates reads, with parameterized mutation
    probabilities, of equal length starting at whole multiples of half of read
    length. It is expected that successive reads have an overlap starting at
    their halfway position.

    Returns:
        tuple:
            A tuple containing the full genome, a list of reads, the gap
            probability and the seed index.
    """
    A = Alphabet('ACGT')
    gap_prob, subst_prob, wordlen = request.param
    seq_len, read_len = 2000, 500
    seq = rand_seq(A, seq_len).to_named('genome')
    mutation_process = MutationProcess(A, subst_probs=subst_prob,
                                       go_prob=gap_prob, ge_prob=gap_prob)
    reads = []
    for i in range(0, seq_len - read_len, int(read_len/2)):
        read, _ = mutation_process.mutate(seq[i: i + read_len])
        reads += [read.to_named('read#%d' % i)]

    db = DB(':memory:', A)
    kmer_index = KmerIndex(db, wordlen)
    seed_index = SeedIndex(kmer_index)
    seed_index.db.initialize()
    records = [db.insert(r) for r in reads]
    return seq, reads, records, gap_prob, seed_index
예제 #3
0
def test_lossy_reads():
    A = Alphabet('ACGT')
    S = A.parse('ACT' * 100)
    gap_kw = {'go_prob': 0.2, 'ge_prob': 0.3}
    M = MutationProcess(A, subst_probs=0.3, **gap_kw)
    read, pos, tx = next(M.noisy_read(S, len_mean=50, num=1))
    assert tx.count('S') > 0 and tx.count('I') + tx.count('D') > 0, \
        'Random mutations should be performed to get lossy reads'
예제 #4
0
def test_log_odds_scores():
    A = Alphabet('ACGT')
    # linear gap model
    P = MutationProcess(A, subst_probs=.1, ge_prob=.1, go_prob=.1)
    subst_scores, (go_score, ge_score) = P.log_odds_scores()
    assert go_score == 0. and ge_score < 0
    match_pos = [(i, i) for i in range(len(A))]
    mismatch_pos = [(i, j) for i, j in combinations(range(len(A)), 2)]
    assert all(subst_scores[i][j] < 0 for i, j in mismatch_pos)
    assert all(subst_scores[i][j] > 0 for i, j in match_pos)

    # affine gap model
    P = MutationProcess(A, subst_probs=.1, ge_prob=.2, go_prob=.1)
    subst_scores, (go_score, ge_score) = P.log_odds_scores()
    assert ge_score < 0

    # do mismatch scores go down if subst probs are decreased?
    P = MutationProcess(A, subst_probs=.01, ge_prob=.2, go_prob=.1)
    new_subst_scores, _ = P.log_odds_scores()
    assert new_subst_scores[0][1] < subst_scores[0][1], \
        'mismatch scores become more negative with lower mismatch probs'