def test_serialize_global_alignment_kernel_kmeans(): n, sz, d = 15, 10, 3 rng = numpy.random.RandomState(0) X = rng.randn(n, sz, d) gak_km = KernelKMeans(n_clusters=3, verbose=False, max_iter=5) _check_not_fitted(gak_km) gak_km.fit(X) _check_params_predict(gak_km, X, ['predict'])
def test_variable_length_clustering(): # TODO: here we just check that they can accept variable-length TS, not # that they do clever things X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8]]) rng = np.random.RandomState(0) clf = KernelKMeans(n_clusters=2, random_state=rng) clf.fit(X) clf = TimeSeriesKMeans(n_clusters=2, metric="dtw", random_state=rng) clf.fit(X) clf = TimeSeriesKMeans(n_clusters=2, metric="softdtw", random_state=rng) clf.fit(X)
def test_kernel_kmeans(): n, sz, d = 15, 10, 3 rng = np.random.RandomState(0) time_series = rng.randn(n, sz, d) gak_km = KernelKMeans(n_clusters=3, verbose=False, max_iter=5, random_state=rng).fit(time_series) np.testing.assert_allclose(gak_km.labels_, gak_km.predict(time_series)) gak_km = KernelKMeans(n_clusters=101, verbose=False, max_iter=5, random_state=rng).fit(time_series) assert gak_km._X_fit is None gak_km = KernelKMeans(n_clusters=2, verbose=False, kernel="rbf", kernel_params={ "gamma": 1. }, max_iter=5, random_state=rng).fit(time_series) assert gak_km.sigma_gak_ is None
def single_clustering(self, data_raw, data_new, centroid_num, model): """ 单次聚类 :return: """ seed = 0 np.random.seed(seed) labels = [] inertia = [] centers = [] if model == 'K-Means': kmeans = KMeans(n_clusters=centroid_num).fit(data_new) labels = kmeans.labels_ centers = kmeans.cluster_centers_ # 每个点到其簇的质心的距离之和,越小越好 inertia = kmeans.inertia_ elif model == 'DTW': sdtw_km = TimeSeriesKMeans(n_clusters=centroid_num, metric='softdtw', max_iter=2, max_iter_barycenter=2, metric_params={ "gamma": 1.0 }, random_state=0, verbose=True).fit(data_new) labels = sdtw_km.labels_ centers = sdtw_km.cluster_centers_ inertia = sdtw_km.inertia_ elif model == "K-Shape": ks = KShape(n_clusters=centroid_num, verbose=True, random_state=seed).fit(data_new) labels = ks.labels_ centers = ks.cluster_centers_ inertia = ks.inertia_ elif model == "Kernel-KMeans": data_new = data_new[:100] data_raw = data_raw[:100] kk = KernelKMeans(n_clusters=centroid_num, kernel="gak", kernel_params={ "sigma": "auto" }, max_iter=2, tol=1e-4, verbose=True).fit(data_new) labels = kk.labels_ inertia = kk.inertia_ D = {} for i in range(centroid_num): D[i] = [] for i in range(len(data_raw)): D[labels[i]].append(data_raw[i]) return inertia, D, centers
def cluster(num_domains, diff_kernel, min_seg_size, seg_numdomians_ratio, distance_matrix, min_domain_size, clustering_method, alpha_helices, max_alpha_helix_size_to_merge): try: if clustering_method == 'spectral': clustering = SpectralClustering(n_clusters=num_domains, assign_labels="kmeans", random_state=0, affinity='precomputed', n_init=100).fit(diff_kernel) elif clustering_method == 'kernel-kmeans': clustering = KernelKMeans(n_clusters=num_domains, random_state=0, n_init=100, kernel='precomputed').fit(diff_kernel) labels = clustering.labels_.copy() for alpha_helix in alpha_helices: if alpha_helix[1] - alpha_helix[ 0] + 1 <= max_alpha_helix_size_to_merge: alpha_helix_labels = labels[alpha_helix[0]:alpha_helix[1] + 1] counter = collections.Counter(alpha_helix_labels) if len(counter) > 1: most_common = counter.most_common(1)[0][0] labels[alpha_helix[0]:alpha_helix[1] + 1] = [most_common ] * (alpha_helix[1] - alpha_helix[0] + 1) remove_short_segments(labels, min_seg_size, distance_matrix) remove_redundant_segments(labels, num_domains, seg_numdomians_ratio, distance_matrix) if (len(set(labels)) < num_domains): return 'error' sil_score = silhouette_score(distance_matrix, labels=labels, metric="precomputed") for label in set(labels): if np.count_nonzero(labels == label) < min_domain_size: return 'error' return labels, labels, sil_score except: return 'error'
def _fit(self, X: TimeSeriesInstances, y=None) -> np.ndarray: """Fit time series clusterer to training data. Parameters ---------- X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape (n_instances, n_dimensions, series_length)) Training time series instances to cluster. y: ignored, exists for API consistency reasons. Returns ------- self: Fitted estimator. """ from tslearn.clustering import KernelKMeans as TsLearnKernelKMeans verbose = 0 if self.verbose is True: verbose = 1 if self._tslearn_kernel_k_means is None: self._tslearn_kernel_k_means = TsLearnKernelKMeans( n_clusters=self.n_clusters, kernel=self.kernel, max_iter=self.max_iter, tol=self.tol, n_init=self.n_init, kernel_params=self.kernel_params, n_jobs=self.n_jobs, verbose=verbose, random_state=self.random_state, ) self._tslearn_kernel_k_means.fit(X) self.labels_ = self._tslearn_kernel_k_means.labels_ self.inertia_ = self._tslearn_kernel_k_means.inertia_ self.n_iter_ = self._tslearn_kernel_k_means.n_iter_
def main(argv): # define global timer to obtain global execution time start_global = timer() # define globals variables global euclidean_clustered_data, \ dtw_clustered_data, \ soft_dtw_clustered_data, \ k_shape_clustered_data, \ gak_clustered_data ############################################################################################# # Input arguments parsing ############################################################################################# # define help message help_message = \ 'clustering.py -h \n\n' \ 'usage: clustering.py [-c <number_clusters>] [-i <input_file>] [-ansEDSKG] \n' \ 'by default: processing input data (without any sampling)' \ '(euclidean, dtw, soft-dtw and GAK k-means, k-shape)\n' \ 'options list: \n' \ ' -c / --clusters <number_clusters> # set number of clusters (default 3) \n\n' \ ' -i / --ifile <input_file> # set input filename \n' \ ' -n / --normalise # normalise input data \n' \ ' -s / --standardise # standardise input data \n\n' \ ' -a / --all # perform all 5 implemented methods of clustering: \n' \ ' euclidean, dtw, soft-dtw, gak k-means and k-shape\n' \ ' -E / --euclidean # perform euclidean k-means clustering \n' \ ' -D / --dtw # perform dtw k-means clustering \n' \ ' -S / --soft-dtw # perform soft-dtw k-means clustering \n' \ ' -K / --k-shape # perform k-shape clustering \n' \ ' -G / --gak # perform GAK k-means clustering \n' # Create new object to save arguments i_args = Arguments() # number of rows in plot to create correct number of subplots # default = 3 (raw data plus distribution histograms) n_rows_plot = 3 # define validation rules for arguments try: opts, args = getopt.getopt( argv, "hc:i:nsaEDSKG", [ "help", "clusters=", "ifile=", "normalise", "standardise", "all", "euclidean", "dtw", "soft-dtw", "k-shape", "gak" ] ) except getopt.GetoptError: print(help_message) sys.exit(2) # parse arguments for opt, arg in opts: if opt in ("-h", "--help"): print(help_message) sys.exit() elif opt in ("-c", "--clusters"): i_args.number_clusters = arg elif opt in ("-i", "--ifile"): i_args.input_file = arg elif opt in ("-n", "--normalise"): i_args.normalise_data = True elif opt in ("-s", "--standardise"): i_args.standardise_data = True elif opt in ("-E", "--euclidean"): n_rows_plot += 1 i_args.euclidean_clustering = True elif opt in ("-D", "--dtw"): n_rows_plot += 1 i_args.dtw_clustering = True elif opt in ("-S", "--soft-dtw"): n_rows_plot += 1 i_args.soft_dtw_clustering = True elif opt in ("-K", "--k-shape"): n_rows_plot += 1 i_args.k_shape_clustering = True elif opt in ("-G", "--gak"): n_rows_plot += 1 i_args.gak_clustering = True elif opt in ("-a", "--all"): n_rows_plot = 8 i_args.euclidean_clustering = True i_args.dtw_clustering = True i_args.soft_dtw_clustering = True i_args.k_shape_clustering = True i_args.gak_clustering = True # normalise maximum number of subplots levels n_rows_plot = 8 if n_rows_plot > 8 else n_rows_plot ############################################################################################# # Raw data processing stage ############################################################################################# # set style to matplotlib plot mpl.style.use('seaborn') # set seed value and seed the generator seed = 0 numpy.random.seed(seed) # import data and print first 5 rows raw_data = import_data() print(raw_data.head()) # convert raw data to the format which can be used by tslearn # (3-d dimensional array) # BUILT functionality: adjust all time series to one size # (NaN values are appended to the shorter ones) formatted_data = to_time_series_dataset(raw_data) # print shape of new array print(formatted_data.shape) # obtain number of measuring n_measuring = formatted_data.shape[1] # define figure, grid_spec to create layout of the plot fig = plt.figure(constrained_layout=True) grid_spec = fig.add_gridspec( n_rows_plot, i_args.number_clusters ) # set A4 size to figure fig.set_size_inches(8.5, 11.75) # setup count of layers of subplots count_layer = 3 # setup first subplot and draw raw time series f_ax_raw_data = fig.add_subplot(grid_spec[:2, :]) for xx in formatted_data: f_ax_raw_data.plot(xx.ravel(), alpha=.2) formatted_data_min = formatted_data.min() formatted_data_max = formatted_data.max() # draw title for chart with min and max values f_ax_raw_data.set_title('Raw Data (min = %.2f, max = %.2f)' %(formatted_data_min, formatted_data_max)) # obtain and print executing time of data processing stage to console, timer_tick = get_time_tick(start_global) plt.ion() plt.show() print("Raw data processing time: %s" % timer_tick) ############################################################################################# # Data preprocessing stage ############################################################################################# start = timer() # Convert NaNs to value predicted by interpolation # linearly interpolate for NaN/NaNs n_nan_changes = 0 for ind in range(formatted_data.shape[0]): mask = numpy.isnan(formatted_data[ind]) n_nan_changes += mask.sum() formatted_data[ind][mask] = numpy.interp( numpy.flatnonzero(mask), numpy.flatnonzero(~mask), formatted_data[ind][~mask] ) print("%d NaN values was/were interpolated" % n_nan_changes) # Scaling # to know should we use normalization or standardization, we need to see # the distribution of values. # take random 3 measuring for each case to draw histograms random_indexes = numpy.random.choice(n_measuring, i_args.number_clusters, replace=False) # create new arrays with values of randomly chosen measurements histogram_data = formatted_data[:, random_indexes] # draw histograms for i_histogram in range(i_args.number_clusters): f_ax_histogram = fig.add_subplot(grid_spec[2, i_histogram]) f_ax_histogram.hist( histogram_data[:, i_histogram], bins=25, density=True ) f_ax_histogram.text(0.55, 0.98, 'Measurement #%d' % random_indexes[i_histogram], transform=plt.gca().transAxes, color="navy" ) if i_histogram == 1: preprocessing = '' if i_args.normalise_data: preprocessing += "normalised" if i_args.standardise_data: preprocessing += " and standardised" elif i_args.standardise_data: preprocessing += "standardised" preprocessing = '' if preprocessing == '' else "(data will be %s)" % preprocessing f_ax_histogram.set_title( "Distributions histograms %s" % preprocessing, color='navy', y=1, pad=14 ) # if no processing data option chosen continue with raw data processed_data = formatted_data # since for this concrete challenge data the distributions are more/less # Gaussian/Normal we can use standardization # normalize data: Min-Max scaling ranging between 0 and 1 if i_args.normalise_data: processed_data = TimeSeriesScalerMinMax().fit_transform(processed_data) print("Data was normalised") # standardize data: scaling technique where the values are centered around # the mean with a unit standard deviation if i_args.standardise_data: processed_data = TimeSeriesScalerMeanVariance().fit_transform(processed_data) print("Data was standardised") # obtain max value of data (to be used in visualization subplots) max_data = processed_data.max() * 1.2 min_data = processed_data.min() * 1.2 timer_tick = get_time_tick(start) print("#############################################################################################") print("Data processing stage elapsed time: %s" % timer_tick) ############################################################################################# # Implementing Euclidean k-means clustering algorithm ############################################################################################# if i_args.euclidean_clustering: start = timer() print("Euclidean k-means") # define parameters of the model of the algorithm k_means_euclidean = TimeSeriesKMeans( n_clusters=i_args.number_clusters, verbose=True, random_state=seed, n_jobs=4 ) # calculate cluster's label array euclidean_clustered_data = k_means_euclidean.fit_predict(processed_data) # draw subplots with attributed clusters of time series as well as # cluster centers' lines for i_cluster in range(i_args.number_clusters): f_ax_euclidean = create_figure_axes(fig, grid_spec, count_layer, i_cluster, n_measuring, min_data, max_data, processed_data, euclidean_clustered_data, 'tab:blue') f_ax_euclidean.plot( k_means_euclidean.cluster_centers_[i_cluster].ravel(), "tab:green" ) if i_cluster == 1: middle_axis = f_ax_euclidean # increment count of filled layer of subplots count_layer += 1 # obtain processing time, print it to console and # add it to the title of the series of subplots timer_tick = get_time_tick(start) middle_axis.set_title( "Euclidean $k$-means (%s)" % timer_tick, color='tab:green', y=1, pad=14 ) print("#############################################################################################") print("Euclidean k-means time processing: %s" % timer_tick) ############################################################################################# # Implementing DTW k-means clustering algorithm # use dtw (Dynamic Time Warping Distance) metric to calculate # distance between means ############################################################################################# if i_args.dtw_clustering: start = timer() print("DTW k-means") k_means_DTW = TimeSeriesKMeans(n_clusters=i_args.number_clusters, n_init=3, metric="dtw", verbose=True, max_iter_barycenter=10, random_state=seed, n_jobs=6 ) dtw_clustered_data = k_means_DTW.fit_predict(processed_data) for i_cluster in range(i_args.number_clusters): f_ax_dtw = create_figure_axes(fig, grid_spec, count_layer, i_cluster, n_measuring, min_data, max_data, processed_data, dtw_clustered_data, 'tab:blue') f_ax_dtw.plot( k_means_DTW.cluster_centers_[i_cluster].ravel(), "tab:red" ) if i_cluster == 1: middle_axis = f_ax_dtw # increment count of filled layer of subplots count_layer += 1 timer_tick = get_time_tick(start) middle_axis.set_title( "DTW $k$-means (%s)" % timer_tick, color='tab:red', y=1, pad=14 ) print("#############################################################################################") print("DTW k-means time processing: %s" % timer_tick) ############################################################################################# # Implementing soft DTW k-means clustering algorithm # use soft dtw (Dynamic Time Warping Distance) metric to calculate # distance between means ############################################################################################# if i_args.soft_dtw_clustering: start = timer() print("Soft-DTW k-means") k_means_soft_DTW = TimeSeriesKMeans(n_clusters=i_args.number_clusters, metric="softdtw", metric_params={"gamma": .025}, verbose=True, random_state=seed, n_jobs=6 ) soft_dtw_clustered_data = k_means_soft_DTW.fit_predict(processed_data) for i_cluster in range(i_args.number_clusters): f_ax_soft_dtw = create_figure_axes(fig, grid_spec, count_layer, i_cluster, n_measuring, min_data, max_data, processed_data, soft_dtw_clustered_data, 'tab:blue') f_ax_soft_dtw.plot( k_means_soft_DTW.cluster_centers_[i_cluster].ravel(), "tab:purple" ) if i_cluster == 1: middle_axis = f_ax_soft_dtw # increment count of filled layer of subplots count_layer += 1 timer_tick = get_time_tick(start) middle_axis.set_title( "Soft-DTW $k$-means (%s)" % timer_tick, color='tab:purple', y=1, pad=14 ) print("#############################################################################################") print("Soft-DTW k-means time processing: %s" % timer_tick) ############################################################################################# # Implementing k-Shape clustering algorithm ############################################################################################# if i_args.k_shape_clustering: start = timer() print("K-Shape") k_shape = KShape(n_clusters=i_args.number_clusters, verbose=True, random_state=seed ) k_shape_clustered_data = k_shape.fit_predict(processed_data) for i_cluster in range(i_args.number_clusters): min_axe_value = min(min_data, k_shape.cluster_centers_[i_cluster].ravel().min()) max_axe_value = max(max_data, k_shape.cluster_centers_[i_cluster].ravel().max()) f_ax_k_shape = create_figure_axes(fig, grid_spec, count_layer, i_cluster, n_measuring, min_axe_value, max_axe_value, processed_data, k_shape_clustered_data, 'tab:blue') f_ax_k_shape.plot( k_shape.cluster_centers_[i_cluster].ravel(), "tab:orange" ) if i_cluster == 1: middle_axis = f_ax_k_shape # increment count of filled layer of subplots count_layer += 1 timer_tick = get_time_tick(start) middle_axis.set_title( "$K$-Shape (%s)" % timer_tick, color='tab:orange', y=1, pad=14 ) print("#############################################################################################") print("K-Shape time processing: %s" % timer_tick) ############################################################################################# # Implementing Global Alignment kernel k-means clustering algorithm # since kernel is used, there is no centroid of the cluster ############################################################################################# if i_args.gak_clustering: start = timer() print("GAK-k-means") gak_k_means = KernelKMeans(n_clusters=i_args.number_clusters, kernel="gak", kernel_params={"sigma": "auto"}, n_init=10, verbose=True, random_state=seed, n_jobs=6 ) gak_clustered_data = gak_k_means.fit_predict(processed_data) for i_cluster in range(i_args.number_clusters): f_ax_gak_k_means = create_figure_axes(fig, grid_spec, count_layer, i_cluster, n_measuring, min_data, max_data, processed_data, gak_clustered_data, 'tab:blue') if i_cluster == 1: middle_axis = f_ax_gak_k_means # increment count of filled layer of subplots count_layer += 1 timer_tick = get_time_tick(start) middle_axis.set_title( "Global Alignment kernel $k$-means (%s)" % timer_tick, color='tab:cyan', y=1, pad=14) print("#############################################################################################") print("GAK k-means time processing: %s" % timer_tick) ############################################################################################# # return string with current datetime now = datetime.now().strftime("%d-%m-%Y_%H-%M-%S") # define the name of the directory to be created path = "./out/%s" % now print("#############################################################################################") try: os.mkdir(path) except OSError: print("Creation of the directory %s failed" % path) else: print("Successfully created the directory %s " % path) try: # save figure as pdf to out folder fig.savefig("./out/%s/visual_result.pdf" % now) # save clustering results if i_args.euclidean_clustering: numpy.savetxt( "./out/%s/euclidean_clustering_result.csv" % now, euclidean_clustered_data, delimiter="," ) if i_args.dtw_clustering: numpy.savetxt( "./out/%s/dtw_clustering_result.csv" % now, dtw_clustered_data, delimiter="," ) if i_args.soft_dtw_clustering: numpy.savetxt( "./out/%s/soft_dtw_clustering_result.csv" % now, soft_dtw_clustered_data, delimiter="," ) if i_args.k_shape_clustering: numpy.savetxt( "./out/%s/k_shape_clustering_result.csv" % now, k_shape_clustered_data, delimiter="," ) if i_args.gak_clustering: numpy.savetxt( "./out/%s/gak_clustering_result.csv" % now, gak_clustered_data, delimiter="," ) except RuntimeError: print("Saving results failed") else: print("Successfully saved results in the path %s " % path) ############################################################################################# # obtain and print global executing time timer_tick = get_time_tick(start_global) print("#############################################################################################") print("All algorithms elapsed time: % s" % timer_tick) ############################################################################################# # render and show plot # plt.show() plt.draw() plt.pause(0.001) input("Press [enter] to finish.") print("#############################################################################################")
true_clusters_known = pd.read_pickle('data/known_true_clusters_ids.pkl') all_clusters_data = [] cl = 'Tree' # min_size = 1280 # for # for ev in true_clusters_known[cl].dropna(): # e = Event(ev, 0, -1, 'resampled').data.shape[0] # if min_size > e: # min_size = e # print(e) for ev in true_clusters_known[cl].dropna(): e = Event(ev, 0, -1, 'resampled') selected_data = e.res().loc[:, e.data.columns != 'Time (s)'] all_clusters_data.append(selected_data) #%% seed = 0 formatted_dataset = to_time_series_dataset(all_clusters_data) formatted_dataset[np.isnan(formatted_dataset)] = 0 #%% X_train = TimeSeriesScalerMeanVariance().fit_transform(formatted_dataset) gak_km = KernelKMeans(n_clusters=3, kernel="gak", kernel_params={"sigma": "auto"}, n_init=20, verbose=True, random_state=seed) y_pred = gak_km.fit_predict(X_train) #%%
class TimeSeriesKernelKMeans(BaseClusterer): """Kernel algorithm wrapper tslearns implementation. Parameters ---------- n_clusters: int, defaults = 8 The number of clusters to form as well as the number of centroids to generate. kernel : string, or callable (default: "gak") The kernel should either be "gak", in which case the Global Alignment Kernel from [2]_ is used or a value that is accepted as a metric by `scikit-learn's pairwise_kernels <https://scikit-learn.org/stable/modules/generated/\ sklearn.metrics.pairwise.pairwise_kernels.html>`_ n_init: int, defaults = 10 Number of times the k-means algorithm will be run with different centroid seeds. The final result will be the best output of n_init consecutive runs in terms of inertia. kernel_params : dict or None (default: None) Kernel parameters to be passed to the kernel function. None means no kernel parameter is set. For Global Alignment Kernel, the only parameter of interest is `sigma`. If set to 'auto', it is computed based on a sampling of the training set (cf :ref:`tslearn.metrics.sigma_gak <fun-tslearn.metrics.sigma_gak>`). If no specific value is set for `sigma`, its defaults to 1. max_iter: int, defaults = 300 Maximum number of iterations of the k-means algorithm for a single run. tol: float, defaults = 1e-4 Relative tolerance with regards to Frobenius norm of the difference in the cluster centers of two consecutive iterations to declare convergence. verbose: bool, defaults = False Verbosity mode. n_jobs : int or None, optional (default=None) The number of jobs to run in parallel for GAK cross-similarity matrix computations. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary <https://scikit-learn.org/stable/glossary.html#term-n-jobs>`_ for more details. random_state: int or np.random.RandomState instance or None, defaults = None Determines random number generation for centroid initialization. Attributes ---------- labels_: np.ndarray (1d array of shape (n_instance,)) Labels that is the index each time series belongs to. inertia_: float Sum of squared distances of samples to their closest cluster center, weighted by the sample weights if provided. n_iter_: int Number of iterations run. """ _tags = { "capability:multivariate": True, } def __init__( self, n_clusters: int = 8, kernel: str = "gak", n_init: int = 10, max_iter: int = 300, tol: float = 1e-4, kernel_params: Union[dict, None] = None, verbose: bool = False, n_jobs: Union[int, None] = None, random_state: Union[int, RandomState] = None, ): _check_soft_dependencies("tslearn", severity="error", object=self) self.kernel = kernel self.n_init = n_init self.max_iter = max_iter self.tol = tol self.kernel_params = kernel_params self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state self.cluster_centers_ = None self.labels_ = None self.inertia_ = None self.n_iter_ = 0 self._tslearn_kernel_k_means = None super(TimeSeriesKernelKMeans, self).__init__(n_clusters=n_clusters) def _fit(self, X: TimeSeriesInstances, y=None) -> np.ndarray: """Fit time series clusterer to training data. Parameters ---------- X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape (n_instances, n_dimensions, series_length)) Training time series instances to cluster. y: ignored, exists for API consistency reasons. Returns ------- self: Fitted estimator. """ from tslearn.clustering import KernelKMeans as TsLearnKernelKMeans verbose = 0 if self.verbose is True: verbose = 1 if self._tslearn_kernel_k_means is None: self._tslearn_kernel_k_means = TsLearnKernelKMeans( n_clusters=self.n_clusters, kernel=self.kernel, max_iter=self.max_iter, tol=self.tol, n_init=self.n_init, kernel_params=self.kernel_params, n_jobs=self.n_jobs, verbose=verbose, random_state=self.random_state, ) self._tslearn_kernel_k_means.fit(X) self.labels_ = self._tslearn_kernel_k_means.labels_ self.inertia_ = self._tslearn_kernel_k_means.inertia_ self.n_iter_ = self._tslearn_kernel_k_means.n_iter_ def _predict(self, X: TimeSeriesInstances, y=None) -> np.ndarray: """Predict the closest cluster each sample in X belongs to. Parameters ---------- X : np.ndarray (2d or 3d array of shape (n_instances, series_length) or shape (n_instances, n_dimensions, series_length)) Time series instances to predict their cluster indexes. y: ignored, exists for API consistency reasons. Returns ------- np.ndarray (1d array of shape (n_instances,)) Index of the cluster each time series in X belongs to. """ return self._tslearn_kernel_k_means.predict(X) @classmethod def get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. Parameters ---------- parameter_set : str, default="default" Name of the set of test parameters to return, for use in tests. If no special parameters are defined for a value, will return `"default"` set. Returns ------- params : dict or list of dict, default = {} Parameters to create testing instances of the class Each dict are parameters to construct an "interesting" test instance, i.e., `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. `create_test_instance` uses the first (or only) dictionary in `params` """ params = { "n_clusters": 2, "kernel": "gak", "n_init": 1, "max_iter": 1, "tol": 1e-4, "kernel_params": None, "verbose": False, "n_jobs": 1, "random_state": 1, } return params def _score(self, X, y=None): return np.abs(self.inertia_)