def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None): r"""Compute cross-similarity matrix using Global Alignment kernel (GAK). GAK was originally presented in [1]_. Parameters ---------- dataset1 A dataset of time series dataset2 Another dataset of time series sigma : float (default 1.) Bandwidth of the internal gaussian kernel used for GAK n_jobs : int or None, optional (default=None) The number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_ for more details. Returns ------- numpy.ndarray Cross-similarity matrix Examples -------- >>> cdist_gak([[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.) array([[1. , 0.65629661], [0.65629661, 1. ]]) >>> cdist_gak([[1, 2, 2], [1., 2., 3., 4.]], ... [[1, 2, 2, 3], [1., 2., 3., 4.], [1, 2, 2, 3]], ... sigma=2.) array([[0.71059484, 0.29722877, 0.71059484], [0.65629661, 1. , 0.65629661]]) See Also -------- gak : Compute Global Alignment kernel References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. """ dataset1 = to_time_series_dataset(dataset1) if dataset2 is None: # Inspired from code by @GillesVandewiele: # https://github.com/rtavenar/tslearn/pull/128#discussion_r314978479 matrix = numpy.zeros((len(dataset1), len(dataset1))) indices = numpy.triu_indices(len(dataset1), k=0, m=len(dataset1)) matrix[indices] = Parallel(n_jobs=n_jobs, prefer="threads")( delayed(unnormalized_gak)(dataset1[i], dataset1[j], sigma=sigma) for i in range(len(dataset1)) for j in range(i, len(dataset1)) ) indices = numpy.tril_indices(len(dataset1), k=-1, m=len(dataset1)) matrix[indices] = matrix.T[indices] diagonal = numpy.diag(numpy.sqrt(1. / numpy.diag(matrix))) diagonal_left = diagonal_right = diagonal else: dataset2 = to_time_series_dataset(dataset2) matrix = Parallel(n_jobs=n_jobs, prefer="threads")( delayed(unnormalized_gak)(dataset1[i], dataset2[j], sigma=sigma) for i in range(len(dataset1)) for j in range(len(dataset2)) ) matrix = numpy.array(matrix).reshape((len(dataset1), -1)) diagonal_left = Parallel(n_jobs=n_jobs, prefer="threads")( delayed(unnormalized_gak)(dataset1[i], dataset1[i], sigma=sigma) for i in range(len(dataset1)) ) diagonal_right = Parallel(n_jobs=n_jobs, prefer="threads")( delayed(unnormalized_gak)(dataset2[j], dataset2[j], sigma=sigma) for j in range(len(dataset2)) ) diagonal_left = numpy.diag(1. / numpy.sqrt(diagonal_left)) diagonal_right = numpy.diag(1. / numpy.sqrt(diagonal_right)) return (diagonal_left.dot(matrix)).dot(diagonal_right)
def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0): r"""Compute cross-similarity matrix using Global Alignment kernel (GAK). GAK was originally presented in [1]_. Parameters ---------- dataset1 A dataset of time series dataset2 Another dataset of time series sigma : float (default 1.) Bandwidth of the internal gaussian kernel used for GAK n_jobs : int or None, optional (default=None) The number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`__ for more details. verbose : int, optional (default=0) The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. The frequency of the messages increases with the verbosity level. If it more than 10, all iterations are reported. `Glossary <https://joblib.readthedocs.io/en/latest/parallel.html#parallel-reference-documentation>`__ for more details. Returns ------- numpy.ndarray Cross-similarity matrix Examples -------- >>> cdist_gak([[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.) array([[1. , 0.65629661], [0.65629661, 1. ]]) >>> cdist_gak([[1, 2, 2], [1., 2., 3., 4.]], ... [[1, 2, 2, 3], [1., 2., 3., 4.], [1, 2, 2, 3]], ... sigma=2.) array([[0.71059484, 0.29722877, 0.71059484], [0.65629661, 1. , 0.65629661]]) See Also -------- gak : Compute Global Alignment kernel References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. """ # noqa: E501 unnormalized_matrix = _cdist_generic(dist_fun=unnormalized_gak, dataset1=dataset1, dataset2=dataset2, n_jobs=n_jobs, verbose=verbose, sigma=sigma, compute_diagonal=True) dataset1 = to_time_series_dataset(dataset1) if dataset2 is None: diagonal = numpy.diag(numpy.sqrt(1. / numpy.diag(unnormalized_matrix))) diagonal_left = diagonal_right = diagonal else: dataset2 = to_time_series_dataset(dataset2) diagonal_left = Parallel(n_jobs=n_jobs, prefer="threads", verbose=verbose)( delayed(unnormalized_gak)( dataset1[i], dataset1[i], sigma=sigma) for i in range(len(dataset1))) diagonal_right = Parallel(n_jobs=n_jobs, prefer="threads", verbose=verbose)( delayed(unnormalized_gak) (dataset2[j], dataset2[j], sigma=sigma) for j in range(len(dataset2))) diagonal_left = numpy.diag(1. / numpy.sqrt(diagonal_left)) diagonal_right = numpy.diag(1. / numpy.sqrt(diagonal_right)) return (diagonal_left.dot(unnormalized_matrix)).dot(diagonal_right)