def silhouette_score(X, labels, metric=None, sample_size=None, metric_params=None, n_jobs=None, verbose=0, random_state=None, **kwds): """Compute the mean Silhouette Coefficient of all samples (cf. [1]_ and [2]_). Read more in the `scikit-learn documentation <http://scikit-learn.org/stable/modules/clustering.html\ #silhouette-coefficient>`_. Parameters ---------- X : array [n_ts, n_ts] if metric == "precomputed", or, \ [n_ts, sz, d] otherwise Array of pairwise distances between time series, or a time series dataset. labels : array, shape = [n_ts] Predicted labels for each time series. metric : string, callable or None (default: None) The metric to use when calculating distance between time series. Should be one of {'dtw', 'softdtw', 'euclidean'} or a callable distance function or None. If 'softdtw' is passed, a normalized version of Soft-DTW is used that is defined as `sdtw_(x,y) := sdtw(x,y) - 1/2(sdtw(x,x)+sdtw(y,y))`. If X is the distance array itself, use ``metric="precomputed"``. If None, dtw is used. sample_size : int or None (default: None) The size of the sample to use when computing the Silhouette Coefficient on a random subset of the data. If ``sample_size is None``, no sampling is used. metric_params : dict or None (default: None) Parameter values for the chosen metric. For metrics that accept parallelization of the cross-distance matrix computations, `n_jobs` key passed in `metric_params` is overridden by the `n_jobs` argument. n_jobs : int or None, optional (default=None) The number of jobs to run in parallel for cross-distance matrix computations. Ignored if the cross-distance matrix cannot be computed using parallelization. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_ for more details. verbose : int (default: 0) If nonzero, print information about the inertia while learning the model and joblib progress messages are printed. random_state : int, RandomState instance or None, optional (default: None) The generator used to randomly select a subset of samples. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Used when ``sample_size is not None``. **kwds : optional keyword parameters Any further parameters are passed directly to the distance function, just as for the `metric_params` parameter. Returns ------- silhouette : float Mean Silhouette Coefficient for all samples. References ---------- .. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the Interpretation and Validation of Cluster Analysis". Computational and Applied Mathematics 20: 53-65. <http://www.sciencedirect.com/science/article/pii/0377042787901257>`_ .. [2] `Wikipedia entry on the Silhouette Coefficient <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_ Examples -------- >>> from tslearn.generators import random_walks >>> from tslearn.metrics import cdist_dtw >>> numpy.random.seed(0) >>> X = random_walks(n_ts=20, sz=16, d=1) >>> labels = numpy.random.randint(2, size=20) >>> silhouette_score(X, labels, metric="dtw") # doctest: +ELLIPSIS 0.13383800... >>> silhouette_score(X, labels, metric="euclidean") # doctest: +ELLIPSIS 0.09126917... >>> silhouette_score(X, labels, metric="softdtw") # doctest: +ELLIPSIS 0.17953934... >>> silhouette_score(X, labels, metric="softdtw", ... metric_params={"gamma": 2.}) \ # doctest: +ELLIPSIS 0.17591060... >>> silhouette_score(cdist_dtw(X), labels, ... metric="precomputed") # doctest: +ELLIPSIS 0.13383800... """ sklearn_metric = None if metric_params is None: metric_params_ = {} else: metric_params_ = metric_params.copy() for k in kwds.keys(): metric_params_[k] = kwds[k] if "n_jobs" in metric_params_.keys(): del metric_params_["n_jobs"] if metric == "precomputed": sklearn_X = X elif metric == "dtw" or metric is None: sklearn_X = cdist_dtw(X, n_jobs=n_jobs, verbose=verbose, **metric_params_) elif metric == "softdtw": sklearn_X = cdist_soft_dtw_normalized(X, **metric_params_) elif metric == "euclidean": X_ = to_time_series_dataset(X) X_ = X_.reshape((X.shape[0], -1)) sklearn_X = cdist(X_, X_, metric="euclidean") else: X_ = to_time_series_dataset(X) n, sz, d = X_.shape sklearn_X = X_.reshape((n, -1)) def sklearn_metric(x, y): return metric(to_time_series(x.reshape((sz, d)), remove_nans=True), to_time_series(y.reshape((sz, d)), remove_nans=True)) metric = "precomputed" if sklearn_metric is None else sklearn_metric return sklearn_silhouette_score(X=sklearn_X, labels=labels, metric=metric, sample_size=sample_size, random_state=random_state, **kwds)
def silhouette_score(X, labels, metric=None, sample_size=None, metric_params=None, random_state=None, **kwds): """Compute the mean Silhouette Coefficient of all samples (cf. [1]_ and [2]_). Read more in the `scikit-learn documentation <http://scikit-learn.org/stable/modules/clustering.html#silhouette-coefficient>`_. Parameters ---------- X : array [n_ts, n_ts] if metric == "precomputed", or, \ [n_ts, sz, d] otherwise Array of pairwise distances between time series, or a time series dataset. labels : array, shape = [n_ts] Predicted labels for each time series. metric : string, or callable The metric to use when calculating distance between time series. Should be one of {'dtw', 'softdtw', 'euclidean'} or a callable distance function. If 'softdtw' is passed, a normalized version of Soft-DTW is used that is defined as `sdtw_(x,y) := sdtw(x,y) - 1/2(sdtw(x,x)+sdtw(y,y))`. If X is the distance array itself, use ``metric="precomputed"``. sample_size : int or None The size of the sample to use when computing the Silhouette Coefficient on a random subset of the data. If ``sample_size is None``, no sampling is used. metric_params : dict or None Parameter values for the chosen metric. Value associated to the `"gamma_sdtw"` key corresponds to the gamma parameter in Soft-DTW. random_state : int, RandomState instance or None, optional (default=None) The generator used to randomly select a subset of samples. If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Used when ``sample_size is not None``. **kwds : optional keyword parameters Any further parameters are passed directly to the distance function. Returns ------- silhouette : float Mean Silhouette Coefficient for all samples. References ---------- .. [1] `Peter J. Rousseeuw (1987). "Silhouettes: a Graphical Aid to the Interpretation and Validation of Cluster Analysis". Computational and Applied Mathematics 20: 53-65. <http://www.sciencedirect.com/science/article/pii/0377042787901257>`_ .. [2] `Wikipedia entry on the Silhouette Coefficient <https://en.wikipedia.org/wiki/Silhouette_(clustering)>`_ Examples -------- >>> from tslearn.generators import random_walks >>> from tslearn.metrics import cdist_dtw >>> X = random_walks(n_ts=50, sz=32, d=1) >>> labels = numpy.random.randint(2, size=50) >>> s_sc = silhouette_score(X, labels, metric="dtw") >>> s_sc2 = silhouette_score(X, labels, metric="euclidean") >>> s_sc3 = silhouette_score(X, labels, metric="softdtw") >>> s_sc3b = silhouette_score(X, labels, metric="softdtw", metric_params={"gamma_sdtw": 2.}) >>> s_sc4 = silhouette_score(cdist_dtw(X), labels, metric="precomputed") """ sklearn_metric = None if metric_params is None: metric_params = {} if metric == "precomputed": sklearn_X = X elif metric == "dtw": sklearn_X = cdist_dtw(X) elif metric == "softdtw": gamma = metric_params.get("gamma_sdtw", None) if gamma is not None: sklearn_X = cdist_soft_dtw_normalized(X, gamma=gamma) else: sklearn_X = cdist_soft_dtw_normalized(X) elif metric == "euclidean": X_ = to_time_series_dataset(X) X_ = X_.reshape((X.shape[0], -1)) sklearn_X = cdist(X_, X_, metric="euclidean") else: X_ = to_time_series_dataset(X) n, sz, d = X_.shape sklearn_X = X_.reshape((n, -1)) if metric is None: metric = dtw sklearn_metric = lambda x, y: metric( to_time_series(x.reshape((sz, d)), remove_nans=True), to_time_series(y.reshape((sz, d)), remove_nans=True)) return sklearn_silhouette_score( X=sklearn_X, labels=labels, metric="precomputed" if sklearn_metric is None else sklearn_metric, sample_size=sample_size, random_state=random_state, **kwds)
PATH = "G:/Coding/ML/UCRArchive_2018/" # Change this value if necessary dataset = "CBF" file_train = PATH + str(dataset) + "/" + str(dataset) + "_TRAIN.tsv" file_test = PATH + str(dataset) + "/" + str(dataset) + "_TEST.tsv" train = np.genfromtxt(fname=file_train, delimiter="\t", skip_header=0) test = np.genfromtxt(fname=file_test, delimiter="\t", skip_header=0) X_train, y_train = train[:, 1:], train[:, 0] X_test, y_test = test[:, 1:], test[:, 0] #Feature extraction train_softdtw= cdist_soft_dtw_normalized(X_train,X_train) test_softdtw = cdist_soft_dtw_normalized(X_train, X_test) #train_dtw= cdist_dtw(X_train,X_train) # #train_dtw = np.transpose(train) #test_dtw = np.transpose(test) #train_softdtw = np.transpose(train) #test_softdtw = np.transpose(test) #test = np.vstack((test_softdtw,test_dtw)) #train = np.vstack((train_dtw,train_softdtw)) train = np.transpose(train_softdtw) test = np.transpose(test_softdtw)