def initialize_distance(sequences): n_seqs = len(sequences) D_dtw = np.zeros((n_seqs, n_seqs)) D_sw = np.zeros((n_seqs, n_seqs)) scrappie_dfs = [None for _ in range(n_seqs)] squiggles = [None for _ in range(n_seqs)] for j in range(n_seqs): if scrappie_dfs[j] is None: scrappie_dfs[j] = simulate_squiggle(sequences[j]) squiggles[j] = list(scrappie_dfs[j]["current"]) for i in range(n_seqs): if scrappie_dfs[i] is None: scrappie_dfs[i] = simulate_squiggle(sequences[i]) squiggles[i] = list(scrappie_dfs[i]["current"]) if i > j: continue else: dtw_dist = calc_dtw(squiggles[i], squiggles[j]) D_dtw[i, j] = dtw_dist D_dtw[j, i] = dtw_dist _, sw_dist, _, _, _ = s_w(sequences[j], sequences[i], cost_fn={ "match": 1, "mismatch": -1, "gap": -8 }) D_sw[i, j] = sw_dist D_sw[j, i] = sw_dist return D_dtw, D_sw, scrappie_dfs
def calculate_sw(sequences): D_sw = np.zeros((96, 96)) for j in range(96): for i in range(96): if i >= j: continue else: _, sw, a, _, _ = s_w(sequences[j], sequences[i], cost_fn={"match": 1, "mismatch": -1, "gap": -8}) D_sw[i, j] = sw D_sw[j, i] = sw return D_sw
def helper_run_sw(read, molbit_seqs=None, search_len=100, cost_fn={"match": 2, "mismatch": -2, "gap": -1}): read_id, seq = read sw_search_end = min(len(seq), search_len) search_seq = seq[:sw_search_end] results = [read_id] for header_i, header_seq in enumerate(molbit_seqs): _, sw_score, _, sw_loc, _ = s_w.s_w(search_seq, header_seq, cost_fn=cost_fn) results.append(sw_score) return results