def dtw_svd_superpose_function(
    coords_1, coords_2, parameters: dict,
):
    """
    Assumes coords_1 and coords_2 are already in a well-superposed state,
    runs DTW alignment and then superposes with Kabsch on the aligning positions
    """
    score_matrix = score_functions.make_score_matrix(
        coords_1,
        coords_2,
        score_functions.get_caretta_score,
        parameters["gamma"],
        normalized=False,
    )
    _, coords_1, coords_2, common_coords_1, common_coords_2 = _align_and_superpose(
        coords_1,
        coords_2,
        score_matrix,
        parameters["gap_open_penalty"],
        parameters["gap_extend_penalty"],
    )
    return (
        score_functions.get_total_score(
            common_coords_1,
            common_coords_2,
            score_functions.get_caretta_score,
            parameters["gamma"],
            False,
        ),
        coords_1,
        coords_2,
    )
Example #2
0
def get_pairwise_alignment(
    coords_1,
    coords_2,
    gamma,
    gap_open_penalty: float,
    gap_extend_penalty: float,
    weights_1: np.ndarray,
    weights_2: np.ndarray,
    n_iter=3,
):
    score_matrix = score_functions.make_score_matrix(
        np.hstack((coords_1, weights_1)),
        np.hstack((coords_2, weights_2)),
        score_functions.get_caretta_score,
        gamma,
    )
    dtw_aln_array_1, dtw_aln_array_2, dtw_score = dtw.dtw_align(
        score_matrix, gap_open_penalty, gap_extend_penalty
    )
    for i in range(n_iter):
        pos_1, pos_2 = helper.get_common_positions(dtw_aln_array_1, dtw_aln_array_2)
        common_coords_1, common_coords_2 = coords_1[pos_1], coords_2[pos_2]
        (
            c1,
            c2,
            common_coords_2,
        ) = superposition_functions.paired_svd_superpose_with_subset(
            coords_1, coords_2, common_coords_1, common_coords_2
        )
        score_matrix = score_functions.make_score_matrix(
            np.hstack((c1, weights_1)),
            np.hstack((c2, weights_2)),
            score_functions.get_caretta_score,
            gamma,
        )
        aln_1, aln_2, score = dtw.dtw_align(
            score_matrix, gap_open_penalty, gap_extend_penalty
        )
        if score > dtw_score:
            coords_1 = c1
            coords_2 = c2
            dtw_score = score
            dtw_aln_array_1 = aln_1
            dtw_aln_array_2 = aln_2
        else:
            break
    return dtw_aln_array_1, dtw_aln_array_2, dtw_score, coords_1, coords_2
def moment_superpose_function(coords_1, coords_2, parameters):
    """
    Uses 4 rotation/translation invariant moments for each "split_size"-mer to run DTW
    """
    if "upsample_rate" not in parameters:
        parameters["upsample_rate"] = 10
    if "moment_types" not in parameters:
        parameters["moment_types"] = ["O_3", "O_4", "O_5", "F"]
    if "scale" not in parameters:
        parameters["scale"] = True
    if "gamma_moment" not in parameters:
        parameters["gamma_moment"] = 0.6
    if "gamma" not in parameters:
        parameters["gamma"] = 0.03
    if "gap_open_penalty" not in parameters:
        parameters["gap_open_penalty"] = 0.0
    if "gap_extend_penalty" not in parameters:
        parameters["gap_extend_penalty"] = 0.0

    moment_types = [MomentType[x] for x in parameters["moment_types"]]
    moments_1 = MomentInvariants.from_coordinates(
        "name",
        coords_1,
        split_type=SplitType[parameters["split_type"]],
        split_size=parameters["split_size"],
        upsample_rate=parameters["upsample_rate"],
        moment_types=moment_types,
    ).moments
    moments_2 = MomentInvariants.from_coordinates(
        "name",
        coords_2,
        split_type=SplitType[parameters["split_type"]],
        split_size=parameters["split_size"],
        upsample_rate=parameters["upsample_rate"],
        moment_types=moment_types,
    ).moments
    if parameters["scale"]:
        moments_1 = np.log1p(moments_1)
        moments_2 = np.log1p(moments_2)
    score_matrix = score_functions.make_score_matrix(
        moments_1,
        moments_2,
        score_functions.get_caretta_score,
        parameters["gamma_moment"],
        normalized=True,
    )
    score, coords_1, coords_2, _, _ = _align_and_superpose(
        coords_1,
        coords_2,
        score_matrix,
        parameters["gap_open_penalty"],
        parameters["gap_extend_penalty"],
    )
    return score, coords_1, coords_2
def _signal_superpose_index(
    index,
    coords_1,
    coords_2,
    gap_open_penalty=0.0,
    gap_extend_penalty=0.0,
    size=30,
    overlap=1,
):
    """
    Makes initial superposition using DTW alignment of overlapping signals
    A signal is a vector of euclidean distances of first (or last) coordinate to all others in a 30-residue stretch
    """

    def _make_signal_index(coords, idx):
        centroid = coords[idx]
        distances = np.zeros(coords.shape[0])
        for c in range(coords.shape[0]):
            distances[c] = np.sqrt(np.sum((coords[c] - centroid) ** 2, axis=-1))
        return distances

    signals_1 = np.zeros(((coords_1.shape[0] - size) // overlap, size))
    signals_2 = np.zeros(((coords_2.shape[0] - size) // overlap, size))
    middles_1 = np.zeros((signals_1.shape[0], coords_1.shape[1]))
    middles_2 = np.zeros((signals_2.shape[0], coords_2.shape[1]))
    if index == -1:
        index = size - 1
    for x, i in enumerate(range(0, signals_1.shape[0] * overlap, overlap)):
        signals_1[x] = _make_signal_index(coords_1[i : i + size], index)
        middles_1[x] = coords_1[i + index]
    for x, i in enumerate(range(0, signals_2.shape[0] * overlap, overlap)):
        signals_2[x] = _make_signal_index(coords_2[i : i + size], index)
        middles_2[x] = coords_2[i + index]
    score_matrix = score_functions.make_score_matrix(
        signals_1,
        signals_2,
        score_functions.get_signal_score,
        gamma=0.1,
        normalized=False,
    )
    dtw_1, dtw_2, score = dtw.dtw_align(
        score_matrix, gap_open_penalty, gap_extend_penalty
    )
    pos_1, pos_2 = helper.get_common_positions(dtw_1, dtw_2)
    aln_coords_1 = np.zeros((len(pos_1), coords_1.shape[1]))
    aln_coords_2 = np.zeros((len(pos_2), coords_2.shape[1]))
    for i, (p1, p2) in enumerate(zip(pos_1, pos_2)):
        aln_coords_1[i] = middles_1[p1]
        aln_coords_2[i] = middles_2[p2]
    coords_1, coords_2, _ = paired_svd_superpose_with_subset(
        coords_1, coords_2, aln_coords_1, aln_coords_2
    )
    return score, coords_1, coords_2
def geometricus_superpose_function(coords_1, coords_2, parameters):
    if "upsample_rate" not in parameters:
        parameters["upsample_rate"] = 10
    invariants = [
        MomentInvariants.from_coordinates(
            "name1",
            coords_1,
            split_type=SplitType[parameters[f"split_type"]],
            split_size=parameters[f"split_size"],
            upsample_rate=parameters["upsample_rate"],
        ),
        MomentInvariants.from_coordinates(
            "name2",
            coords_2,
            split_type=SplitType[parameters[f"split_type"]],
            split_size=parameters[f"split_size"],
            upsample_rate=parameters["upsample_rate"],
        ),
    ]
    embedder = GeometricusEmbedding.from_invariants(
        invariants, resolution=parameters["resolution"], protein_keys=["name1", "name2"]
    )
    score_matrix = score_functions.make_score_matrix(
        np.array(embedder.proteins_to_shapemers["name1"]),
        np.array(embedder.proteins_to_shapemers["name2"]),
        score_functions.get_caretta_score,
        gamma=parameters["gamma"],
        normalized=False,
    )
    score, coords_1, coords_2, _, _ = _align_and_superpose(
        coords_1,
        coords_2,
        score_matrix,
        parameters["gap_open_penalty"],
        parameters["gap_extend_penalty"],
    )
    return score, coords_1, coords_2
def moment_multiple_superpose_function(coords_1, coords_2, parameters):
    """
    Uses 4 rotation/translation invariant moments for each "split_size"-mer with different fragmentation approaches to run DTW
    """
    moments_1 = []
    moments_2 = []
    if "upsample_rate" not in parameters:
        parameters["upsample_rate"] = 10
    for i in range(parameters["num_split_types"]):
        if f"moment_types_{i}" not in parameters:
            parameters[f"moment_types_{i}"] = ["O_3", "O_4", "O_5", "F"]
    if "scale" not in parameters:
        parameters["scale"] = True
    if "gamma_moment" not in parameters:
        parameters["gamma_moment"] = 0.6
    if "gamma" not in parameters:
        parameters["gamma"] = 0.03
    if "gap_open_penalty" not in parameters:
        parameters["gap_open_penalty"] = 0.0
    if "gap_extend_penalty" not in parameters:
        parameters["gap_extend_penalty"] = 0.0

    for i in range(parameters["num_split_types"]):
        moment_types = [MomentType[x] for x in parameters[f"moment_types_{i}"]]
        moments_1_1 = MomentInvariants.from_coordinates(
            "name",
            coords_1,
            split_type=SplitType[parameters[f"split_type_{i}"]],
            split_size=parameters[f"split_size_{i}"],
            upsample_rate=parameters["upsample_rate"],
            moment_types=moment_types,
        ).moments
        moments_2_1 = MomentInvariants.from_coordinates(
            "name",
            coords_2,
            split_type=SplitType[parameters[f"split_type_{i}"]],
            split_size=parameters[f"split_size_{i}"],
            upsample_rate=parameters["upsample_rate"],
            moment_types=moment_types,
        ).moments
        if parameters["scale"]:
            moments_1_1 = np.log1p(moments_1_1)
            moments_2_1 = np.log1p(moments_2_1)
        moments_1.append(moments_1_1)
        moments_2.append(moments_2_1)
    score_matrix = np.zeros((moments_1[0].shape[0], moments_2[0].shape[0]))
    for (m_1, m_2) in zip(moments_1, moments_2):
        score_matrix += score_functions.make_score_matrix(
            m_1,
            m_2,
            score_functions.get_caretta_score,
            gamma=parameters["gamma_moment"],
            normalized=True,
        )
    score, coords_1, coords_2, _, _ = _align_and_superpose(
        coords_1,
        coords_2,
        score_matrix,
        parameters["gap_open_penalty"],
        parameters["gap_extend_penalty"],
    )
    return score, coords_1, coords_2