def _validate_X_predict(self, X, check_input): """Validate X whenever one tries to predict, apply, predict_proba""" if self.tree_ is None: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if check_input: X = check_array(X, dtype=DTYPE, accept_sparse="csr") if issparse(X) and (X.indices.dtype != np.intc or X.indptr.dtype != np.intc): raise ValueError("No support for np.int64 index based " "sparse matrices") n_features = X.shape[1] if self.n_features_ != n_features: raise ValueError("Number of features of the model must " "match the input. Model n_features is %s and " "input n_features is %s " % (self.n_features_, n_features)) return X
def feature_names(self) -> np.ndarray: """Depending on the encoder chosen retrieves the feature names :return: """ if self.encoder is None: raise NotFittedError( f'Estimator is not yet fitted. Call "fit" or "fit_transform" methods first.' ) if isinstance(self.encoder, (LabelBinarizer, MultiLabelBinarizer)): fnames = self.encoder.classes_ elif isinstance(self.encoder, CountVectorizer): fnames = self.encoder.get_feature_names() elif isinstance(self.encoder, OneHotEncoder): fnames = self.encoder.categories_[0] elif isinstance(self.encoder, FeatureHasher): fnames = range(self.encoder.n_features) else: raise NotImplementedError() return np.asarray(fnames)
def predict_meta_features(self, X): """ Get meta-features of test-data. Parameters ---------- X : numpy array, shape = [n_samples, n_features] Test vectors, where n_samples is the number of samples and n_features is the number of features. Returns ------- meta-features : numpy array, shape = [n_samples, len(self.regressors)] meta-features for test data, where n_samples is the number of samples in test data and len(self.regressors) is the number of regressors. """ if not hasattr(self, 'regr_'): raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") return np.column_stack([r.predict(X) for r in self.regr_])
def decision_tree_data_from_pipeline(pipeline_): """Return data for a fitted pipeline with in a restructured format Arguments: pipeline_ (PipelineBase): A pipeline with a DecisionTree-based estimator. Returns: OrderedDict: An OrderedDict of OrderedDicts describing a tree structure """ if not pipeline_.model_family == ModelFamily.DECISION_TREE: raise ValueError( "Tree structure reformatting is only supported for decision tree estimators" ) if not pipeline_._is_fitted: raise NotFittedError( "The DecisionTree estimator associated with this pipeline is not fitted yet. Call 'fit' " "with appropriate arguments before using this estimator.") est = pipeline_.estimator._component_obj feature_names = pipeline_.input_feature_names[pipeline_.estimator.name] return _tree_parse(est, feature_names)
def predict_proba(self, X): """ Predict class probabilities for X. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] Training vectors, where n_samples is the number of samples and n_features is the number of features. Returns ---------- avg : array-like, shape = [n_samples, n_classes] Weighted average probability for each class per sample. """ if not hasattr(self, 'clfs_'): raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") avg = np.average(self._predict_probas(X), axis=0, weights=self.weights) return avg
def predict_proba(self, X): """Predict class probabilities for X. Parameters ---------- X : array-like of shape = [n_samples, n_features] The input samples. Returns ------- p : array of shape = [n_samples, n_classes] The class probabilities of the input samples. The order of the classes corresponds to that in the attribute classes_. """ if not self.estimator_fitted: raise NotFittedError( 'The estimator must be fitted before calling predict_proba().' ) probabilistic_predictions = self.estimator.predict_proba(X) probabilistic_predictions = probabilistic_predictions[:, 1] return probabilistic_predictions / self.c
def transform(self, docs): """ Return the topic proportions for the documents passed. The input `docs` should be in BOW format and can be a list of documents like : [ [(4, 1), (7, 1)], [(9, 1), (13, 1)], [(2, 1), (6, 1)] ] or a single document like : [(4, 1), (7, 1)] """ if self.gensim_model is None: raise NotFittedError( "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method." ) # The input as array of array check = lambda x: [x] if isinstance(x[0], tuple) else x docs = check(docs) X = [[] for _ in range(0, len(docs))] for k, v in enumerate(docs): transformed_author = self.gensim_model[v] X[k] = transformed_author return np.reshape(np.array(X), (len(docs), self.num_topics))
def transform(self, X): """ Perform feature reduction by selecting features within from the IPW. Parameters ---------- X : pandas dataframe or numpy ndarray matrix used as predictors. Notes ----- Returns ------- :return value : numpy ndarray or Dataframe, same as the input a nxz matrix, where n is the number of samples in x and z are the number of features, z is based upon the threshold value. The features in the returning matrix will have descending order of measure of relevancy provided by the IPW iteration. """ if self.importances is not None: self.params = None if isinstance(X, pd.DataFrame): # dataframe return X[X.columns[np.argsort( self.significant_variables)[::-1]]] elif isinstance(X, np.ndarray): # numpy array return X[:, np.argsort(self.significant_variables)[::-1]] else: raise TypeError( 'X must be a pandas dataframe or numpy ndarray') else: raise NotFittedError( 'importances is not defined, use the fit method to define them' )
def transform(self, X): """ Compute Betti curves. Parameters: X (list of 2d arrays): Persistence diagrams. Returns: `len(X).len(self.grid_)` array of ints: Betti numbers of the given persistence diagrams at the grid points given in `self.grid_` """ if not self.is_fitted(): raise NotFittedError("Not fitted.") if not X: X = [np.zeros((0, 2))] N = len(X) events = np.concatenate([pd.flatten(order="F") for pd in X], axis=0) sorting = np.argsort(events) offsets = np.zeros(1 + N, dtype=int) for i in range(0, N): offsets[i + 1] = offsets[i] + 2 * X[i].shape[0] starts = offsets[0:N] ends = offsets[1:N + 1] - 1 bettis = [[0] for i in range(0, N)] i = 0 for x in self.grid_: while i < len(sorting) and events[sorting[i]] <= x: j = np.searchsorted(ends, sorting[i]) delta = 1 if sorting[i] - starts[j] < len(X[j]) else -1 bettis[j][-1] += delta i += 1 for k in range(0, N): bettis[k].append(bettis[k][-1]) return np.array(bettis, dtype=int)[:, 0:-1]
def check_is_fitted(estimator, attributes, msg=None, all_or_any=all): """Perform is_fitted validation for estimator. Checks if the estimator is fitted by verifying the presence of "all_or_any" of the passed attributes and raises a NotFittedError with the given message. Parameters ---------- estimator : estimator instance. estimator instance for which the check is performed. attributes : attribute name(s) given as string or a list/tuple of strings Eg.: ``["coef_", "estimator_", ...], "coef_"`` msg : string The default error message is, "This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." For custom messages if "%(name)s" is present in the message string, it is substituted for the estimator name. Eg. : "Estimator, %(name)s, must be fitted before sparsifying". all_or_any : callable, {all, any}, default all Specify whether all or any of the given attributes must exist. Returns ------- None Raises ------ NotFittedError If the attributes are not found. """ if msg is None: msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") if not hasattr(estimator, 'fit'): raise TypeError("%s is not an estimator instance." % (estimator)) if not isinstance(attributes, (list, tuple)): attributes = [attributes] if not all_or_any([hasattr(estimator, attr) for attr in attributes]): raise NotFittedError(msg % {'name': type(estimator).__name__})
def predict_segmenter(features, clf): """Segmentation of images using a pretrained classifier. Parameters ---------- features : ndarray Array of features, with the last dimension corresponding to the number of features, and the other dimensions are compatible with the shape of the image to segment, or a flattened image. clf : classifier object trained classifier object, exposing a ``predict`` method as in scikit-learn's API, for example an instance of ``RandomForestClassifier`` or ``LogisticRegression`` classifier. The classifier must be already trained, for example with :func:`skimage.segmentation.fit_segmenter`. Returns ------- output : ndarray Labeled array, built from the prediction of the classifier. """ sh = features.shape if features.ndim > 2: features = features.reshape((-1, sh[-1])) try: predicted_labels = clf.predict(features) except NotFittedError: raise NotFittedError("You must train the classifier `clf` first" "for example with the `fit_segmenter` function.") except ValueError as err: if err.args and 'x must consist of vectors of length' in err.args[0]: raise ValueError( err.args[0] + '\n' + "Maybe you did not use the same type of features for training the classifier." ) else: raise err output = predicted_labels.reshape(sh[:-1]) return output
def predict_proba(self, X): if not self.fitted_: raise NotFittedError("Estimator not fitted, " "call `fit` before exploiting the model.") if isspmatrix(X): _sparse_savetxt(os.path.join(loc_temp, "test.data.x"), X) else: np.savetxt(os.path.join(loc_temp, "test.data.x"), X, delimiter=' ', fmt="%s") # Find latest model location model_glob = loc_temp + os.sep + self._file_prefix + "*" if not glob(model_glob): raise Exception('Model learning result is not found in {0}. ' 'This is rgf_python error.'.format(loc_temp)) latest_model_loc = sorted(glob(model_glob), reverse=True)[0] # Format test command params = [] params.append("test_x_fn=%s" % os.path.join(loc_temp, "test.data.x")) params.append("prediction_fn=%s" % os.path.join(loc_temp, "predictions.txt")) params.append("model_fn=%s" % latest_model_loc) cmd = (loc_exec, "predict", ",".join(params)) output = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True).communicate() if self.verbose: for k in output: print(k) y_pred = np.loadtxt(os.path.join(loc_temp, "predictions.txt")) return y_pred
def plot_qq(self, path: str=None, dim: tuple=(12, 8)): """Display a Q-Q plot from the standardized prediction residuals. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.qq is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) with plt.style.context("seaborn-whitegrid"): fig, ax = plt.subplots(figsize=dim) x = self.qq["quantiles"] y = self.qq["residuals"] ax.plot(x, x, ls="--", label="perfect model", color="darkorange", linewidth=3) ax.plot(x, y, label="current model", color="cornflowerblue", linewidth=3) ax.set_xlabel("Theoretical quantiles", fontsize=15) ax.set_xticks(range(int(np.floor(min(x))), int(np.ceil(max(x[x < float("inf")])))+1, 1)) ax.set_ylabel("Standardized residuals", fontsize=15) ax.set_yticks(range(int(np.floor(min(y))), int(np.ceil(max(y[x < float("inf")])))+1, 1)) ax.legend(loc="best") ax.set_title("Q-Q plot", fontsize=20) if path: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
def predict_proba(self, X): """ Predict class probabilities for X. The predicted class probabilities of an input sample are computed. Parameters ---------- X : array-like or sparse matrix of shape = [n_samples, n_features] The input samples. Returns ------- p : array of shape = [n_samples, n_classes]. The class probabilities of the input samples. The order of the classes corresponds to that in the attribute classes_. """ if self._fitted is None: raise NotFittedError(NOT_FITTED_ERROR_DESC) X = check_array(X, accept_sparse=True) self._check_n_features(X.shape[1]) if self._n_classes == 2: y = self._estimators[0].predict(X) y = sigmoid(y) y = np.c_[y, 1 - y] else: y = np.zeros((X.shape[0], self._n_classes)) for i, clf in enumerate(self._estimators): class_proba = clf.predict(X) y[:, i] = class_proba if self.calc_prob == "sigmoid": y = sigmoid(y) normalizer = np.sum(y, axis=1)[:, np.newaxis] normalizer[normalizer == 0.0] = 1.0 y /= normalizer else: y = softmax(y) return y
def identify_low_importance(self, cumulative_importance): """ Finds the lowest importance features not needed to account for `cumulative_importance` of the feature importance from the gradient boosting machine. As an example, if cumulative importance is set to 0.95, this will retain only the most important features needed to reach 95% of the total feature importance. The identified features are those not needed. Parameters -------- cumulative_importance : float between 0 and 1 The fraction of cumulative importance to account for """ self.cumulative_importance = cumulative_importance # The feature importances need to be calculated before running if self.feature_importances is None: raise NotFittedError( 'Feature importances have not yet been determined. Call the `identify_zero_importance` method` first.' ) # Make sure most important features are on top self.feature_importances = self.feature_importances.sort_values( 'cumulative_importance') # Identify the features not needed to reach the cumulative_importance record_low_importance = self.feature_importances[ self.feature_importances['cumulative_importance'] > cumulative_importance] to_drop = list(record_low_importance['feature']) self.record_low_importance = record_low_importance self.removal_ops['low_importance'] = to_drop print( '%d features that do not contribute to cumulative importance of %0.2f.\n' % (len(self.removal_ops['low_importance']), self.cumulative_importance))
def inverse_transform( self, y: SUPPORTED_TARGET_TYPES, ) -> np.ndarray: """ Revert any encoding transformation done on a target array Args: y (Union[np.ndarray, pd.DataFrame, pd.Series]): Target array to be transformed back to original form before encoding Returns: np.ndarray: The transformed array """ if not self._is_fitted: raise NotFittedError( "Cannot call inverse_transform on a validator that is not fitted" ) if self.encoder is None: return y shape = np.shape(y) if len(shape) > 1: y = self.encoder.inverse_transform(y) else: # The targets should be a flattened array, hence reshape with -1 if hasattr(y, 'iloc'): y = cast(pd.DataFrame, y) y = self.encoder.inverse_transform(y.to_numpy().reshape( -1, 1)).reshape(-1) else: y = self.encoder.inverse_transform(np.array(y).reshape( -1, 1)).reshape(-1) # Inverse transform returns a numpy array of type object # This breaks certain metrics as accuracy, which makes type_of_target be unknown # If while fit a dtype was observed, we try to honor that dtype if self.dtype is not None: y = y.astype(self.dtype) return y
def save(self, path): """Saves checkpoints and graph to given path. Args: path: Folder to save model to. """ if not self._initialized: raise NotFittedError() # Currently Saver requires absolute path to work correctly. path = os.path.abspath(path) if not os.path.exists(path): os.makedirs(path) if not os.path.isdir(path): raise ValueError("Path %s should be a directory to save" "checkpoints and graph." % path) with open(os.path.join(path, 'model.def'), 'w') as fmodel: all_params = self.get_params() params = {} for key, value in all_params.items(): if not callable(value) and value is not None: params[key] = value params['class_name'] = type(self).__name__ fmodel.write( json.dumps(params, default=lambda o: o.__dict__ if hasattr(o, '__dict__') else None)) with open(os.path.join(path, 'endpoints'), 'w') as foutputs: foutputs.write( '%s\n%s\n%s\n%s' % (self._inp.name, self._out.name, self._model_predictions.name, self._model_loss.name)) with open(os.path.join(path, 'graph.pbtxt'), 'w') as fgraph: fgraph.write(str(self._graph.as_graph_def())) with open(os.path.join(path, 'saver.pbtxt'), 'w') as fsaver: fsaver.write(str(self._saver.as_saver_def())) self._saver.save(self._session, os.path.join(path, 'model'), global_step=self._global_step)
def predict(self, X, **kwargs): """Returns predictions for the given test data. Arguments: X: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. **kwargs: dictionary arguments Legal arguments are the arguments of `self.model_.predict`. Returns: preds: array-like, shape `(n_samples,)` Predictions. """ # check if fitted if not self.is_fitted_: raise NotFittedError( "Estimator needs to be fit before `predict` " "can be called" ) # basic input checks X = check_array(X, allow_nd=True, dtype=["float64", "int"]) # pre process X X, _ = self._pre_process_X(X) # filter kwargs and get attributes for predict kwargs = self._filter_params( self.model_.predict, params_to_check=kwargs ) predict_args = self._filter_params(self.model_.predict) # predict with Keras model pred_args = {**predict_args, **kwargs} y_pred = self.model_.predict(X, **pred_args) # post process y y, _ = self._post_process_y(y_pred) return y
def plot_roc_curve(self, path: str=None, dim: tuple=(12, 8)): """Plot ROC curve of the model. Parameters ---------- path : str, optional Path to store the figure. dim : tuple, optional Tuple with width and length of the plot. """ if self.roc_curve is None: msg = ("This {} instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") raise NotFittedError(msg.format(self.__class__.__name__)) auc = float(self.scalar_metrics.loc["AUC"]) with plt.style.context("seaborn-whitegrid"): fig, ax = plt.subplots(figsize=dim) ax.plot(self.roc_curve["fpr"], self.roc_curve["tpr"], color="cornflowerblue", linewidth=3, label="ROC curve (area = {s:.3})".format(s=auc)) ax.plot([0, 1], [0, 1], color="darkorange", linewidth=3, linestyle="--") ax.set_xlabel("False Positive Rate", fontsize=15) ax.set_ylabel("True Positive Rate", fontsize=15) ax.legend(loc="lower right") ax.set_title("ROC curve", fontsize=20) if path: plt.savefig(path, format="png", dpi=300, bbox_inches="tight") plt.show()
def predict(self, X): if not hasattr(self, 'estimators_'): raise NotFittedError("Must fit clusters before predicting.") # this returns -1 if any of the values squared are too large # models with numerical instability will fail. clusters = self.clusterer_.predict(X) y_tmp = [] idx = [] for c, est in self.estimators_.items(): mask = clusters == c if mask.any(): idx.append(np.flatnonzero(mask)) y_tmp.append(est.predict(X[safe_mask(X, mask)])) y_tmp = np.concatenate(y_tmp) idx = np.concatenate(idx) y = np.full([X.shape[0], y_tmp.shape[1]], np.nan) y[idx] = y_tmp return y
def predict_probabilites(self, X): if not self._session: raise NotFittedError("This %s instance is not fitted yet" % self.__class__.__name__) h_np, b_np, h_lenseq, b_lenseq = X['h_np'], X['b_np'], X[ 'h_seqlen'], X['b_seqlen'] h_np, b_np, h_lenseq, b_lenseq = self.get_truncted_data( h_np, b_np, h_lenseq, b_lenseq) if h_np.shape[1] < self.h_max_length or b_np.shape[ 1] < self.b_max_length: h_np, b_np = self.pad_data(h_np, b_np) with self._session.as_default() as sess: return self._probabilites.eval( feed_dict={ self._X_head: h_np, self._X_body: b_np, self._X_head_length: h_lenseq, self._X_body_length: b_lenseq })
def _insert_error_scores(results, error_score): """Insert error in `results` by replacing them inplace with `error_score`. This only applies to multimetric scores because `_fit_and_score` will handle the single metric case. """ successful_score = None failed_indices = [] for i, result in enumerate(results): if result["fit_failed"]: failed_indices.append(i) elif successful_score is None: successful_score = result["test_scores"] if successful_score is None: raise NotFittedError("All estimators failed to fit") if isinstance(successful_score, dict): formatted_error = {name: error_score for name in successful_score} for i in failed_indices: results[i]["test_scores"] = formatted_error.copy() if "train_scores" in results[i]: results[i]["train_scores"] = formatted_error.copy()
def _compute_output(self, X): """Get the outputs of the network, for use in prediction methods.""" if not self._is_fitted: raise NotFittedError("Call fit before prediction") X = self._check_X(X) # Make predictions in batches. pred_batches = [] start_idx = 0 n_examples = X.shape[0] with self.graph_.as_default(): while start_idx < n_examples: X_batch = \ X[start_idx:min(start_idx + self.batch_size, n_examples)] feed_dict = self._make_feed_dict(X_batch) start_idx += self.batch_size pred_batches.append( self._session.run(self.output_layer_, feed_dict=feed_dict)) y_pred = np.concatenate(pred_batches) return y_pred
def _fit(self, x, y, verbose=False, load=False): """ Args: y: Nx1 ndarray observed value. x: NxD ndarry features. Returns: """ x, y = Module.validate(x, y) l_x, l_y = np.log(x + self.eps), np.log(y + self.eps) y_zero_one = (y > 0.0).astype(int) if y_zero_one.max() == y_zero_one.min(): raise NotFittedError( "Logistic model couldn't fit, because the number of classes is <2" ) self.log_reg.fit(x, y_zero_one) sample_weight = self.log_reg.predict_proba(x)[:, 1] # Linear regression under log mode. self.linear_reg.fit(X=l_x, y=l_y, sample_weight=sample_weight) self.fitted = self.linear_reg.predict(l_x) self.residual = (self.fitted - l_y) # Grid fit for bandwidth. if load is False: param = grid_fit_kde(self.residual) self.kde = KernelDensity(bandwidth=param["bandwidth"]) self.kde.fit(self.residual) else: self.kde = pickle.load(open("all_kde.kd", "rb")) self.fitted = True #logger.debug("KDE bandwidth %s"%self.kde.bandwidth) return self
def predict(self, X): """ Predicts the targets using the trained ELM regressor. Parameters ---------- X : {ndarray, sparse matrix} of shape (n_samples, n_features) Returns ------- y : {ndarray, sparse matrix} of shape (n_samples,) or (n_samples, n_targets) The predicted targets """ if self._input_to_node is None or self._regressor is None: raise NotFittedError(self) hidden_layer_state = self._input_to_node.transform(X) hidden_layer_state = self._node_to_node.transform(hidden_layer_state) return ((self._node_to_node._y_pred[:-1, :]) - self.node_to_node.teacher_shift ) / self.node_to_node.teacher_scaling
def transform(self, docs): """ Return the vector representations for the input documents. The input `docs` should be a list of lists like : [ ['calculus', 'mathematical'], ['geometry', 'operations', 'curves'] ] or a single document like : ['calculus', 'mathematical'] """ if self.gensim_model is None: raise NotFittedError( "This model has not been fitted yet. Call 'fit' with appropriate arguments before using this method." ) # The input as array of array check = lambda x: [x] if isinstance(x[0], string_types) else x docs = check(docs) X = [[] for _ in range(0, len(docs))] for k, v in enumerate(docs): doc_vec = self.gensim_model.infer_vector(v) X[k] = doc_vec return np.reshape(np.array(X), (len(docs), self.gensim_model.vector_size))
def transform(self, X, y=None): """Encode data with the autoencoder. Parameters ---------- X : numpy array or sparse matrix of shape [n_samples, n_features] Data to encode Returns ------- numpy array of shape [n_samples, hidden_units[-1]] Encoded data. """ if not self._is_fitted: raise NotFittedError("Call fit before transform!") # For sparse input, make the input a CSR matrix since it can be # indexed by row. X = check_array(X, accept_sparse=['csr']) # Check input data against internal data. # Raises an error on failure. self._check_data(X) # Make predictions in batches. pred_batches = [] start_idx = 0 n_examples = X.shape[0] with self.graph_.as_default(): while start_idx < n_examples: X_batch = \ X[start_idx:min(start_idx + self.batch_size, n_examples)] feed_dict = self._make_feed_dict(X_batch, training=False) start_idx += self.batch_size pred_batches.append( self._session.run(self._encoded_values, feed_dict=feed_dict)) return np.concatenate(pred_batches)
def transform(self, df, **transform_params): """ Label encoding "cols" of "df" using the fitting parameters :param df: Dataframe :param transform_params: :return: """ if not self._is_fitted: raise NotFittedError("Fitting was not performed") _is_cols_subset_of_df_cols(self.cols, df) df = df.copy() label_enc_dict = {} for col in self.cols: label_enc_dict[col] = self.les[col].transform(df[col]) labelenc_cols = pd.DataFrame(label_enc_dict, index=df.index) for col in self.cols: df[col] = labelenc_cols[col] return df
def transform(self, GRFData): """Scale the values of all force components using the previously fitted scaler. Parameters: GRFData : dictionary containing the data for all force components. Input data in the following form: 'f_v": num_samples x num_dimensions 'f_ap": num_samples x num_dimensions 'f_ml": num_samples x num_dimensions 'cop_ap": num_samples x num_dimensions 'cop_ml": num_samples x num_dimensions ---------- Returns: transformed_GRFData : dictionary containing the tranformed values for all force components. The output data has the same form as the input data. ---------- Raises: NotFittedError : If the scaler has not been fitted to data prior to calling this function. ValueError: If GRFData is not a dictionary or does not contain values for one of the force components. """ if not self.isFitted: raise NotFittedError( "The scaler has n ot been fitted to data. Call 'fit()' before calling 'transform()'." ) self.__is_valid_dict(GRFData) transformed_GRFData = {} for component in self.comp_list: len_series = GRFData[component].shape[1] transformed_data = self.scaler[component].transform( np.reshape(GRFData[component], (-1, 1))) transformed_GRFData[component] = np.reshape( transformed_data, (-1, len_series)) return transformed_GRFData
def transform(self, Xs, return_loss=False): r""" Embeds data matrix(s) using the trained deep networks and fitted CCA projection matrices. May be used for out-of-sample embeddings. Parameters ---------- Xs : list of array-likes or numpy.ndarray - Xs length: n_views - Xs[i] shape: (n_samples, n_features_i) A list of data matrices from each view to transform based on the prior fit function. If view_idx defined, then Xs is a 2D data matrix corresponding to a single view. return_loss :boolean, default=False If ``True``, returns the loss along with the transformed data in a tuple. Returns ------- Xs_transformed : list of array-likes or array-like Transformed samples. Same structure as Xs, but potentially different n_features_i. loss : float Average loss over data, defined as negative correlation of transformed views. Only returned if ``return_loss=True``. """ if not self.is_fit: raise NotFittedError("Must call fit function before transform") Xs = check_Xs(Xs, multiview=True) x1 = torch.DoubleTensor(Xs[0]) x2 = torch.DoubleTensor(Xs[1]) with torch.no_grad(): losses, outputs = self._get_outputs(x1, x2) outputs = self.linear_cca_.transform(outputs[0], outputs[1]) if return_loss: return outputs, np.mean(losses) return outputs