예제 #1
0
def initialize_distance(sequences):
    n_seqs = len(sequences)
    D_dtw = np.zeros((n_seqs, n_seqs))
    D_sw = np.zeros((n_seqs, n_seqs))
    scrappie_dfs = [None for _ in range(n_seqs)]
    squiggles = [None for _ in range(n_seqs)]
    for j in range(n_seqs):
        if scrappie_dfs[j] is None:
            scrappie_dfs[j] = simulate_squiggle(sequences[j])
            squiggles[j] = list(scrappie_dfs[j]["current"])
        for i in range(n_seqs):
            if scrappie_dfs[i] is None:
                scrappie_dfs[i] = simulate_squiggle(sequences[i])
                squiggles[i] = list(scrappie_dfs[i]["current"])
            if i > j:
                continue
            else:
                dtw_dist = calc_dtw(squiggles[i], squiggles[j])
                D_dtw[i, j] = dtw_dist
                D_dtw[j, i] = dtw_dist

                _, sw_dist, _, _, _ = s_w(sequences[j],
                                          sequences[i],
                                          cost_fn={
                                              "match": 1,
                                              "mismatch": -1,
                                              "gap": -8
                                          })
                D_sw[i, j] = sw_dist
                D_sw[j, i] = sw_dist
    return D_dtw, D_sw, scrappie_dfs
def calculate_sw(sequences):
    D_sw = np.zeros((96, 96))
    for j in range(96):
        for i in range(96):
            if i >= j:
                continue
            else:
                _, sw, a, _,  _ = s_w(sequences[j], sequences[i], cost_fn={"match": 1, "mismatch": -1, "gap": -8})
                D_sw[i, j] = sw
                D_sw[j, i] = sw
    return D_sw
예제 #3
0
def helper_run_sw(read, molbit_seqs=None, search_len=100,
                  cost_fn={"match": 2, "mismatch": -2, "gap": -1}):
    read_id, seq = read

    sw_search_end = min(len(seq), search_len)
    search_seq = seq[:sw_search_end]

    results = [read_id]
    for header_i, header_seq in enumerate(molbit_seqs):
        _, sw_score, _, sw_loc, _ = s_w.s_w(search_seq, header_seq,
                                        cost_fn=cost_fn)
        results.append(sw_score)
    return results