def test_nested_to_3d_numpy(): """Test the nested_to_3d_numpy() function. """ df = pd.DataFrame({ 'var_1': [pd.Series([1, 2]), pd.Series([3, 4])], 'var_2': [pd.Series([5, 6]), pd.Series([7, 8])] }) array = nested_to_3d_numpy(df) assert isinstance(array, np.ndarray)
def transform(self, X, y=None): """Transforms input time series using random convolutional kernels. Parameters ---------- X : pandas DataFrame, input time series (sktime format) y : array_like, target values (optional, ignored as irrelevant) Returns ------- pandas DataFrame, transformed features """ self.check_is_fitted() X = check_X(X) _X = nested_to_3d_numpy(X) if self.normalise: _X = (_X - _X.mean(axis=-1, keepdims=True)) / ( _X.std(axis=-1, keepdims=True) + 1e-8) return pd.DataFrame(_apply_kernels(_X, self.kernels))
def kneighbors(self, X, n_neighbors=None, return_distance=True): """Finds the K-neighbors of a point. Returns indices of and distances to the neighbors of each point. Parameters ---------- X : sktime-format pandas dataframe with shape([n_cases,n_dimensions]), or numpy ndarray with shape([n_cases,n_readings,n_dimensions]) y : {array-like, sparse matrix} Target values of shape = [n_samples] n_neighbors : int Number of neighbors to get (default is the value passed to the constructor). return_distance : boolean, optional. Defaults to True. If False, distances will not be returned Returns ------- dist : array Array representing the lengths to points, only present if return_distance=True ind : array Indices of the nearest points in the population matrix. """ self.check_is_fitted() X = check_X(X, enforce_univariate=False) X = nested_to_3d_numpy(X) if n_neighbors is None: n_neighbors = self.n_neighbors elif n_neighbors <= 0: raise ValueError( "Expected n_neighbors > 0. Got %d" % n_neighbors ) else: if not np.issubdtype(type(n_neighbors), np.integer): raise TypeError( "n_neighbors does not take %s value, " "enter integer value" % type(n_neighbors)) if X is not None: query_is_train = False X = check_array(X, accept_sparse='csr', allow_nd=True) else: query_is_train = True X = self._fit_X # Include an extra neighbor to account for the sample itself being # returned, which is removed later n_neighbors += 1 train_size = self._fit_X.shape[0] if n_neighbors > train_size: raise ValueError( "Expected n_neighbors <= n_samples, " " but n_samples = %d, n_neighbors = %d" % (train_size, n_neighbors) ) n_samples = X.shape[0] sample_range = np.arange(n_samples)[:, None] n_jobs = effective_n_jobs(self.n_jobs) if self._fit_method == 'brute': reduce_func = partial(self._kneighbors_reduce_func, n_neighbors=n_neighbors, return_distance=return_distance) # for efficiency, use squared euclidean distances kwds = ({'squared': True} if self.effective_metric_ == 'euclidean' else self.effective_metric_params_) result = pairwise_distances_chunked( X, self._fit_X, reduce_func=reduce_func, metric=self.effective_metric_, n_jobs=n_jobs, **kwds) elif self._fit_method in ['ball_tree', 'kd_tree']: if issparse(X): raise ValueError( "%s does not work with sparse matrices. Densify the data, " "or set algorithm='brute'" % self._fit_method) if LooseVersion(joblib_version) < LooseVersion('0.12'): # Deal with change of API in joblib delayed_query = delayed(self._tree.query, check_pickle=False) parallel_kwargs = {"backend": "threading"} else: delayed_query = delayed(self._tree.query) parallel_kwargs = {"prefer": "threads"} result = Parallel(n_jobs, **parallel_kwargs)( delayed_query( X[s], n_neighbors, return_distance) for s in gen_even_slices(X.shape[0], n_jobs) ) else: raise ValueError("internal: _fit_method not recognized") if return_distance: dist, neigh_ind = zip(*result) result = np.vstack(dist), np.vstack(neigh_ind) else: result = np.vstack(result) if not query_is_train: return result else: # If the query data is the same as the indexed data, we would like # to ignore the first nearest neighbor of every sample, i.e # the sample itself. if return_distance: dist, neigh_ind = result else: neigh_ind = result sample_mask = neigh_ind != sample_range # Corner case: When the number of duplicates are more # than the number of neighbors, the first NN will not # be the sample, but a duplicate. # In that case mask the first duplicate. dup_gr_nbrs = np.all(sample_mask, axis=1) sample_mask[:, 0][dup_gr_nbrs] = False neigh_ind = np.reshape( neigh_ind[sample_mask], (n_samples, n_neighbors - 1)) if return_distance: dist = np.reshape( dist[sample_mask], (n_samples, n_neighbors - 1)) return dist, neigh_ind return neigh_ind
def fit(self, X, y): """Fit the model using X as training data and y as target values Parameters ---------- X : sktime-format pandas dataframe with shape([n_cases,n_dimensions]), or numpy ndarray with shape([n_cases,n_readings,n_dimensions]) y : {array-like, sparse matrix} Target values of shape = [n_samples] """ X, y = check_X_y(X, y, enforce_univariate=False) y = np.asarray(y) X = nested_to_3d_numpy(X) check_classification_targets(y) # print(X) # if internal cv is desired, the relevant flag forces a grid search # to evaluate the possible values, # find the best, and then set this classifier's params to match if self._cv_for_params: grid = GridSearchCV( estimator=KNeighborsTimeSeriesClassifier(metric=self.metric, n_neighbors=1, algorithm="brute"), param_grid=self._param_matrix, cv=LeaveOneOut(), scoring='accuracy' ) grid.fit(X, y) self.metric_params = grid.best_params_['metric_params'] if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: if y.ndim != 1: warnings.warn("A column-vector y was passed when a 1d array " "was expected. Please change the shape of y to " "(n_samples, ), for example using ravel().", DataConversionWarning, stacklevel=2) self.outputs_2d_ = False y = y.reshape((-1, 1)) else: self.outputs_2d_ = True self.classes_ = [] self._y = np.empty(y.shape, dtype=np.int) for k in range(self._y.shape[1]): classes, self._y[:, k] = np.unique(y[:, k], return_inverse=True) self.classes_.append(classes) if not self.outputs_2d_: self.classes_ = self.classes_[0] self._y = self._y.ravel() if hasattr(check_array, '__wrapped__'): temp = check_array.__wrapped__.__code__ check_array.__wrapped__.__code__ = _check_array_ts.__code__ else: temp = check_array.__code__ check_array.__code__ = _check_array_ts.__code__ fx = self._fit(X) if hasattr(check_array, '__wrapped__'): check_array.__wrapped__.__code__ = temp else: check_array.__code__ = temp self._is_fitted = True return fx
def np_3d_arr(X): return nested_to_3d_numpy(X)
def ak_3d_arr(X): return ak.Array(nested_to_3d_numpy(X))
def _multivariate_nested_df_to_array(X): X = nested_to_3d_numpy(X) # go from [n][d][m] to [n][m][d] return X.transpose(0, 2, 1)