Example #1
0
def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None):
    r"""Compute cross-similarity matrix using Global Alignment kernel (GAK).

    GAK was originally presented in [1]_.

    Parameters
    ----------
    dataset1
        A dataset of time series
    dataset2
        Another dataset of time series
    sigma : float (default 1.)
        Bandwidth of the internal gaussian kernel used for GAK
    n_jobs : int or None, optional (default=None)
        The number of jobs to run in parallel.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See scikit-learns'
        `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_
        for more details.

    Returns
    -------
    numpy.ndarray
        Cross-similarity matrix

    Examples
    --------
    >>> cdist_gak([[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.)
    array([[1.        , 0.65629661],
           [0.65629661, 1.        ]])
    >>> cdist_gak([[1, 2, 2], [1., 2., 3., 4.]],
    ...           [[1, 2, 2, 3], [1., 2., 3., 4.], [1, 2, 2, 3]],
    ...           sigma=2.)
    array([[0.71059484, 0.29722877, 0.71059484],
           [0.65629661, 1.        , 0.65629661]])

    See Also
    --------
    gak : Compute Global Alignment kernel

    References
    ----------
    .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
    """
    dataset1 = to_time_series_dataset(dataset1)

    if dataset2 is None:
        # Inspired from code by @GillesVandewiele:
        # https://github.com/rtavenar/tslearn/pull/128#discussion_r314978479
        matrix = numpy.zeros((len(dataset1), len(dataset1)))
        indices = numpy.triu_indices(len(dataset1), k=0, m=len(dataset1))
        matrix[indices] = Parallel(n_jobs=n_jobs, prefer="threads")(
            delayed(unnormalized_gak)(dataset1[i], dataset1[j], sigma=sigma)
            for i in range(len(dataset1)) for j in range(i, len(dataset1))
        )
        indices = numpy.tril_indices(len(dataset1), k=-1, m=len(dataset1))
        matrix[indices] = matrix.T[indices]
        diagonal = numpy.diag(numpy.sqrt(1. / numpy.diag(matrix)))
        diagonal_left = diagonal_right = diagonal
    else:
        dataset2 = to_time_series_dataset(dataset2)
        matrix = Parallel(n_jobs=n_jobs, prefer="threads")(
            delayed(unnormalized_gak)(dataset1[i], dataset2[j], sigma=sigma)
            for i in range(len(dataset1)) for j in range(len(dataset2))
        )
        matrix = numpy.array(matrix).reshape((len(dataset1), -1))
        diagonal_left = Parallel(n_jobs=n_jobs, prefer="threads")(
            delayed(unnormalized_gak)(dataset1[i], dataset1[i], sigma=sigma)
            for i in range(len(dataset1))
        )
        diagonal_right = Parallel(n_jobs=n_jobs, prefer="threads")(
            delayed(unnormalized_gak)(dataset2[j], dataset2[j], sigma=sigma)
            for j in range(len(dataset2))
        )
        diagonal_left = numpy.diag(1. / numpy.sqrt(diagonal_left))
        diagonal_right = numpy.diag(1. / numpy.sqrt(diagonal_right))
    return (diagonal_left.dot(matrix)).dot(diagonal_right)
Example #2
0
def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0):
    r"""Compute cross-similarity matrix using Global Alignment kernel (GAK).

    GAK was originally presented in [1]_.

    Parameters
    ----------
    dataset1
        A dataset of time series
    dataset2
        Another dataset of time series
    sigma : float (default 1.)
        Bandwidth of the internal gaussian kernel used for GAK
    n_jobs : int or None, optional (default=None)
        The number of jobs to run in parallel.
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
        ``-1`` means using all processors. See scikit-learns'
        `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__
        for more details.
    verbose : int, optional (default=0)
        The verbosity level: if non zero, progress messages are printed.
        Above 50, the output is sent to stdout.
        The frequency of the messages increases with the verbosity level.
        If it more than 10, all iterations are reported.
        `Glossary <https://joblib.readthedocs.io/en/latest/parallel.html#parallel-reference-documentation>`__
        for more details.

    Returns
    -------
    numpy.ndarray
        Cross-similarity matrix

    Examples
    --------
    >>> cdist_gak([[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.)
    array([[1.        , 0.65629661],
           [0.65629661, 1.        ]])
    >>> cdist_gak([[1, 2, 2], [1., 2., 3., 4.]],
    ...           [[1, 2, 2, 3], [1., 2., 3., 4.], [1, 2, 2, 3]],
    ...           sigma=2.)
    array([[0.71059484, 0.29722877, 0.71059484],
           [0.65629661, 1.        , 0.65629661]])

    See Also
    --------
    gak : Compute Global Alignment kernel

    References
    ----------
    .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011.
    """  # noqa: E501
    unnormalized_matrix = _cdist_generic(dist_fun=unnormalized_gak,
                                         dataset1=dataset1,
                                         dataset2=dataset2,
                                         n_jobs=n_jobs,
                                         verbose=verbose,
                                         sigma=sigma,
                                         compute_diagonal=True)
    dataset1 = to_time_series_dataset(dataset1)
    if dataset2 is None:
        diagonal = numpy.diag(numpy.sqrt(1. / numpy.diag(unnormalized_matrix)))
        diagonal_left = diagonal_right = diagonal
    else:
        dataset2 = to_time_series_dataset(dataset2)
        diagonal_left = Parallel(n_jobs=n_jobs,
                                 prefer="threads",
                                 verbose=verbose)(
                                     delayed(unnormalized_gak)(
                                         dataset1[i], dataset1[i], sigma=sigma)
                                     for i in range(len(dataset1)))
        diagonal_right = Parallel(n_jobs=n_jobs,
                                  prefer="threads",
                                  verbose=verbose)(
                                      delayed(unnormalized_gak)
                                      (dataset2[j], dataset2[j], sigma=sigma)
                                      for j in range(len(dataset2)))
        diagonal_left = numpy.diag(1. / numpy.sqrt(diagonal_left))
        diagonal_right = numpy.diag(1. / numpy.sqrt(diagonal_right))
    return (diagonal_left.dot(unnormalized_matrix)).dot(diagonal_right)