def predict(self, X, only_mean=False): # X = check_array(X, accept_sparse='csr') X = np.array(X) X = self.normalize(X) neigh_dist, neigh_ind = self.kneighbors(X) weights = _get_weights(neigh_dist, self.weights) _y = self._y if _y.ndim == 1: _y = _y.reshape((-1, 1)) if weights is None: y_pred = np.mean(_y[neigh_ind], axis=1) else: y_pred = np.empty((X.shape[0], _y.shape[1]), dtype=np.float64) denom = np.sum(weights, axis=1) for j in range(_y.shape[1]): num = np.sum(_y[neigh_ind, j] * weights, axis=1) y_pred[:, j] = num / denom if self._y.ndim == 1: y_pred = y_pred.ravel() if only_mean: return y_pred else: return y_pred, np.amin(neigh_dist, axis=1)
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X: array A 2-D array representing the test points. Returns ------- labels: array List of class labels (one for each data sample). """ X = np.atleast_2d(X) neigh_dist, neigh_ind = self.kneighbors(X) pred_labels = self._y[neigh_ind] weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = smart_mode(pred_labels, axis=1) else: mode, _ = weighted_mode(pred_labels, weights, axis=1) return mode.flatten().astype(np.int)
def predict_proba(self, X): """Return probability estimates for the test data X. Parameters ---------- X : sktime-format pandas dataframe or array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- p : array of shape = [n_samples, n_classes], or a list of n_outputs of such arrays if n_outputs > 1. The class probabilities of the input samples. Classes are ordered by lexicographic order. """ X = check_data_sktime_tsc(X) temp = check_array.__code__ check_array.__code__ = _check_array_ts.__code__ X = check_array(X, accept_sparse='csr') neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_samples = X.shape[0] weights = _get_weights(neigh_dist, self.weights) if weights is None: weights = np.ones_like(neigh_ind) all_rows = np.arange(X.shape[0]) probabilities = [] for k, classes_k in enumerate(classes_): pred_labels = _y[:, k][neigh_ind] proba_k = np.zeros((n_samples, classes_k.size)) # a simple ':' index doesn't work right for i, idx in enumerate(pred_labels.T): # loop is O(n_neighbors) proba_k[all_rows, idx] += weights[:, i] # normalize 'votes' into real [0,1] probabilities normalizer = proba_k.sum(axis=1)[:, np.newaxis] normalizer[normalizer == 0.0] = 1.0 proba_k /= normalizer probabilities.append(proba_k) if not self.outputs_2d_: probabilities = probabilities[0] check_array.__code__ = temp return probabilities
def predict(self, X, idx=None): neigh_dist, neigh_ind = self.kneighbors(X, idx) pred_labels = self._y[neigh_ind] weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = smart_mode(pred_labels, axis=1) else: mode, _ = weighted_mode(pred_labels, weights) return mode.flatten().astype(np.int)
def _impute(self, dist, X, fitted_X, mask, mask_fx): """Helper function to find and impute missing values""" # For each column, find and impute n_rows_X, n_cols_X = X.shape for c in range(n_cols_X): if not np.any(mask[:, c], axis=0): continue # Row index for receivers and potential donors (pdonors) receivers_row_idx = np.where(mask[:, c])[0] pdonors_row_idx = np.where(~mask_fx[:, c])[0] # Impute using column mean if n_neighbors are not available if len(pdonors_row_idx) < self.n_neighbors: warnings.warn("Insufficient number of neighbors! " "Filling in column mean.") X[receivers_row_idx, c] = self.statistics_[c] continue # Get distance from potential donors dist_pdonors = dist[receivers_row_idx][:, pdonors_row_idx] dist_pdonors = dist_pdonors.reshape(-1, len(pdonors_row_idx)) # Argpartition to separate actual donors from the rest pdonors_idx = np.argpartition(dist_pdonors, self.n_neighbors - 1, axis=1) # Get final donors row index from pdonors donors_idx = pdonors_idx[:, :self.n_neighbors] # Get weights or None dist_pdonors_rows = np.arange(len(donors_idx))[:, None] weight_matrix = _get_weights( dist_pdonors[dist_pdonors_rows, donors_idx], self.weights) donor_row_idx_ravel = donors_idx.ravel() # Retrieve donor values and calculate kNN score fitted_X_temp = fitted_X[pdonors_row_idx] donors = fitted_X_temp[donor_row_idx_ravel, c].reshape( (-1, self.n_neighbors)) donors_mask = _get_mask(donors, self.missing_values) donors = np.ma.array(donors, mask=donors_mask) # Final imputation imputed = np.ma.average(donors, axis=1, weights=weight_matrix) X[receivers_row_idx, c] = imputed.data return X
def predict(self, X, idx=None): neigh_dist, neigh_ind = self.kneighbors(X,idx) pred_labels = self._y[neigh_ind] weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(pred_labels, axis=1) else: # Randomly permute the neighbors to tie-break randomly if necessary perm = np.random.permutation(n_neighbors) ind = ind[perm] mode, _ = weighted_mode(pred_labels,weights,axis) return mode.flatten().astype(np.int)
def predict(self, X, n_neighbors=None): """Predict the class labels for the provided data. Parameters ---------- X : array-like, shape (n_queries, n_features), \ or (n_queries, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of shape [n_queries] or [n_queries, n_outputs] Class labels for each data sample. """ if n_neighbors is not None: self.n_neighbors = n_neighbors X = check_array(X, accept_sparse='csr') X = X.astype(np.float32) neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self.train_label if not self.outputs_2d_: _y = self.train_label.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_queries = X.shape[0] weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_queries, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() return y_pred return y_pred
def predict(self, X): """Predict the target for the provided data It fits a weighted least squares linear model locally for the nearest neighbors Parameters ---------- X : array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of int, shape = [n_samples] or [n_samples, n_outputs] Target values """ X = check_array(X, accept_sparse='csr') # NN of X with respect to the train data (_fit_X) neigh_dist, neigh_ind = self.kneighbors(X) weights = _get_weights(neigh_dist, self.weights) _y = self._y _fit_X = self._fit_X if _y.ndim == 1: _y = _y.reshape((-1, 1)) y_pred = np.empty((X.shape[0], 1), dtype=np.float64) # NN in the train data X_nn = np.squeeze(_fit_X[neigh_ind]) y_nn = np.squeeze(_y[neigh_ind]) # loop over the samples, not ideal from a speed point of view for i in range(X_nn.shape[0]): linear = skl_lm.LinearRegression() if weights is not None: linear.fit(X_nn[i].reshape(-1, 1), y_nn[i], sample_weight=weights[i]) else: linear.fit(X_nn[i].reshape(-1, 1), y_nn[i]) y_pred[i] = linear.predict(X[i].reshape(-1, 1)) if self._y.ndim == 1: y_pred = y_pred.ravel() return y_pred
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : sktime-format pandas dataframe or array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of shape [n_samples] or [n_samples, n_outputs] Class labels for each data sample. """ X = check_data_sktime_tsc(X, dim_to_use=self.dim_to_use) temp = check_array.__code__ check_array.__code__ = _check_array_ts.__code__ neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self._y if not self.outputs_2d_: _y = self._y.reshape((-1, 1)) classes_ = [self.classes_] n_outputs = len(classes_) n_samples = X.shape[0] weights = _get_weights(neigh_dist, self.weights) y_pred = np.empty((n_samples, n_outputs), dtype=classes_[0].dtype) for k, classes_k in enumerate(classes_): if weights is None: mode, _ = stats.mode(_y[neigh_ind, k], axis=1) else: mode, _ = weighted_mode(_y[neigh_ind, k], weights, axis=1) mode = np.asarray(mode.ravel(), dtype=np.intp) y_pred[:, k] = classes_k.take(mode) if not self.outputs_2d_: y_pred = y_pred.ravel() check_array.__code__ = temp return y_pred
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. """ X_ = to_time_series_dataset(X) neigh_dist, neigh_ind = self.kneighbors(X_) weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(self._fit_y[neigh_ind], axis=1) else: mode, _ = weighted_mode(self._fit_y[neigh_ind], weights, axis=1) return mode[:, 0]
def predict(self, X): """Predict the target for the provided data Parameters ---------- X : array-like, shape (n_query, n_features), \ or (n_query, n_indexed) if metric == 'precomputed' Test samples. Returns ------- y : array of int, shape = [n_samples] or [n_samples, n_outputs] Target values """ if issparse(X) and self.metric == 'precomputed': raise ValueError( "Sparse matrices not supported for prediction with " "precomputed kernels. Densify your matrix.") X = check_array(X, accept_sparse='csr') neigh_dist, neigh_ind = self.kneighbors(X) weights = _get_weights(neigh_dist, self.weights) _y = self._y if _y.ndim == 1: _y = _y.reshape((-1, 1)) if weights is None: y_pred = np.median(_y[neigh_ind], axis=1) else: weights = weights / weights.sum() num = _y[neigh_ind.flatten()].T * weights cs = list(np.cumsum(num.flatten())) ind = np.searchsorted(cs, cs[-1] / 2.0) y_pred = _y[ind] if self._y.ndim == 1: y_pred = y_pred.ravel() return y_pred
def predict(self, X): """Predict the class labels for the provided data Parameters ---------- X : array-like, shape (n_ts, sz, d) Test samples. """ if self.metric == "min_dist" and self.variables_size > 1: X_ = X else: X_ = to_time_series_dataset(X,self.variables_size) neigh_dist, neigh_ind = self.kneighbors(X_,self.multivariate_output,None,True) weights = _get_weights(neigh_dist, self.weights) if weights is None: mode, _ = stats.mode(self._fit_y[neigh_ind], axis=1) else: mode, _ = weighted_mode(self._fit_y[neigh_ind], weights, axis=1) return mode[:, 0]