def test_check_sample_weight(): # check array order sample_weight = np.ones(10)[::2] assert not sample_weight.flags["C_CONTIGUOUS"] sample_weight = _check_sample_weight(sample_weight, X=np.ones((5, 1))) assert sample_weight.flags["C_CONTIGUOUS"] # check None input sample_weight = _check_sample_weight(None, X=np.ones((5, 2))) assert_allclose(sample_weight, np.ones(5)) # check numbers input sample_weight = _check_sample_weight(2.0, X=np.ones((5, 2))) assert_allclose(sample_weight, 2 * np.ones(5)) # check wrong number of dimensions with pytest.raises(ValueError, match="Sample weights must be 1D array or scalar"): _check_sample_weight(np.ones((2, 4)), X=np.ones((2, 2))) # check incorrect n_samples msg = r"sample_weight.shape == \(4,\), expected \(2,\)!" with pytest.raises(ValueError, match=msg): _check_sample_weight(np.ones(4), X=np.ones((2, 2))) # float32 dtype is preserved X = np.ones((5, 2)) sample_weight = np.ones(5, dtype=np.float32) sample_weight = _check_sample_weight(sample_weight, X) assert sample_weight.dtype == np.float32 # int dtype will be converted to float64 instead X = np.ones((5, 2), dtype=np.int) sample_weight = _check_sample_weight(None, X, dtype=X.dtype) assert sample_weight.dtype == np.float64
def fit(self, X: np.array, y: np.array, sample_weight: np.array = None) -> Odte: # Check parameters are Ok. if self.n_estimators < 3: raise ValueError( f"n_estimators must be greater than 2 but got (n_estimators=\ {self.n_estimators})") check_classification_targets(y) X, y = self._validate_data(X, y) # if weights is None return np.ones sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64) check_classification_targets(y) # Initialize computed parameters # Build the estimator self.max_features_ = self._initialize_max_features() # build base_estimator_ self._validate_estimator() self.classes_, y = np.unique(y, return_inverse=True) self.n_classes_: int = self.classes_.shape[0] self.estimators_: List[BaseEstimator] = [] self.subspaces_: List[Tuple[int, ...]] = [] result = self._train(X, y, sample_weight) self.estimators_, self.subspaces_ = tuple(zip(*result)) # type: ignore return self
def plsa_topics(X, k, **kwargs): """Perform a boostrap sample from a corpus of documents and fit the sample using pLSA to give a set of topic vectors such that the (z,w) entry of the returned array is the probability P(w|z) of word w occuring given the zth topic. Parameters ---------- X: sparse matrix of shape (n_docs, n_words) The bag of words representation of the corpus of documents. k: int The number of topics to generate. kwargs: Further keyword arguments that can be passed on th the ``plsa_fit`` function. Possibilities include: * ``init`` * ``n_iter`` * ``n_iter_per_test`` * ``tolerance`` * ``e_step_threshold`` * ``random_state`` Returns ------- topics: array of shape (k, n_words) The topics generated from the bootstrap sample. """ A = X.tocsr() if kwargs.get("bootstrap", True): rng = check_random_state(kwargs.get("random_state", None)) bootstrap_sample_indices = rng.randint(0, A.shape[0], size=A.shape[0]) B = A[bootstrap_sample_indices] else: B = A sample_weight = _check_sample_weight(None, B, dtype=np.float32) if numba.cuda.is_available(): doc_topic, topic_vocab = gpu_plsa_fit( B, k, init=kwargs.get("init", "random"), n_iter=kwargs.get("n_iter", 100), n_iter_per_test=kwargs.get("n_iter_per_test", 10), tolerance=kwargs.get("tolerance", 0.001), e_step_thresh=kwargs.get("e_step_thresh", 1e-16), random_state=kwargs.get("random_state", None), ) else: doc_topic, topic_vocab = plsa_fit( B, k, sample_weight, init=kwargs.get("init", "random"), n_iter=kwargs.get("n_iter", 100), n_iter_per_test=kwargs.get("n_iter_per_test", 10), tolerance=kwargs.get("tolerance", 0.001), e_step_thresh=kwargs.get("e_step_thresh", 1e-16), random_state=kwargs.get("random_state", None), ) return topic_vocab
def _deviance_dispersion_update(self, X, y, sample_weight=None): weights = _check_sample_weight(sample_weight, X) y_pred = self.predict(X) y_mean = np.average(y, weights=weights) deviance_ = np.sum(weights * (2 * (xlogy(y, y / y_pred) - y + y_pred))) null_deviance_ = np.sum(weights * (2 * (xlogy(y, y / y_mean) - y + y_mean))) # pearson residual: (raw residual)/(variance function) # TODO: put correct weibull variance here pearson_residuals_ = (y - y_pred) / np.sqrt(y_pred) pearson_chi2_ = np.sum(pearson_residuals_**2) model_d2_ = 1 - deviance_ / null_deviance_ # degrees of freedom of the model (all params (including intercept) minus 1) df_model_ = X.shape[1] # degrees of freedom of residuals ((n_obs - 1) - (nparms - 1)), or df_residuals_ = X.shape[0] - X.shape[1] - 1 # total degrees of freedom df_total_ = df_residuals_ + df_model_ # method of moments estimator for dispersion scale dispersion_scale_ = pearson_chi2_ / df_residuals_ dispersion_scale_sqrt_ = np.sqrt(dispersion_scale_) results = { 'deviance_': deviance_, 'null_deviance_': null_deviance_, 'pearson_residuals_': pearson_residuals_, 'pearson_chi2_': pearson_chi2_, 'model_d2_': model_d2_, 'df_model_': df_model_, 'df_residuals_': df_residuals_, 'df_total_': df_total_, 'dispersion_scale_': dispersion_scale_, 'dispersion_scale_sqrt_': dispersion_scale_sqrt_ } return results
def fit(self, X, y, sample_weight=None): """ Fit the model. Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples,) Target values. sample_weight, array-like of shape (n_samples,), default=Noone Individual weights for each sample. Returns ------- self """ check_classification_targets(y) if sample_weight is None: self.classes_, self.counts_ = np.unique(y, return_counts=True) else: sample_weight = _check_sample_weight(sample_weight, X) sample_weight = sample_weight / sample_weight.mean() df = pd.DataFrame({'y': y, 'sample_weight': sample_weight}) df = df.groupby('y').sum() self.classes_ = df.index.values self.counts_ = df.sample_weight.values self.counts_ = self.counts_ / self.counts_.sum() self.dominant_class_ = self.classes_[np.argmax(self.counts_)] return self
def fit(self, X, y, *, sample_weight=None, **kwargs): """Build the ensemble classifier from the training set (X, y).""" # Check random state self.random_state = check_random_state(self.random_state) # Convert data (X is required to be 2d and indexable) X, y = self._validate_data(X, y, **self.check_x_y_args) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float64) sample_weight /= sample_weight.sum() if np.any(sample_weight < 0): raise ValueError("sample_weight cannot contain negative weights") # Remap output n_samples, self.n_features_ = X.shape self.features_ = np.arange(self.n_features_) self._n_samples = n_samples y = self._validate_y(y) # Check parameters self._validate_estimator(default=DecisionTreeClassifier()) # If the base estimator do not support sample weight and sample weight # is not None, raise an ValueError support_sample_weight = has_fit_parameter(self.base_estimator_, "sample_weight") if not support_sample_weight and sample_weight is not None: raise ValueError("The base estimator doesn't support sample weight") self.estimators_, self.estimators_features_ = [], [] return self._fit(X, y, sample_weight=sample_weight, **kwargs)
def fit(self, X, Y, sample_weight): """Fit the dummy estimator on the training data and rankings.""" (X, Y) = self._validate_data(X, Y, multi_output=True) sample_weight = _check_sample_weight(sample_weight, X) (_, n_classes) = Y.shape if self.strategy not in VALID_STRATEGIES: raise ValueError("Unknown strategy type: {0}. Expected one of {1}." .format(self.strategy, list(VALID_STRATEGIES))) if self.strategy == "constant": if self.constant is None: raise ValueError("The constant target ranking has to be " "specified for the constant strategy.") elif self.constant.shape[0] != n_classes: raise ValueError("The constant target ranking should have " "shape {0}.".format(n_classes)) else: self.constant = check_array( self.constant, dtype=np.int64, ensure_2d=False) # Re-raise a more informative message when the constant # target ranking cannot be managed by the estimator try: self._rank_algorithm.check_targets(self.constant[None, :]) except ValueError: raise ValueError("The constant target ranking is not the " "target type managed by the estimator.") self.ranking_ = self._rank_algorithm.aggregate(Y, sample_weight) return self
def _daal_dbscan(X, eps=0.5, min_samples=5, sample_weight=None): if eps <= 0.0: raise ValueError("eps must be positive.") X = check_array(X, dtype=[np.float64, np.float32]) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) ww = make2d(sample_weight) else: ww = None XX = make2d(X) fpt = getFPType(XX) alg = daal4py.dbscan(method='defaultDense', fptype=fpt, epsilon=float(eps), minObservations=int(min_samples), memorySavingMode=False, resultsToCompute="computeCoreIndices") daal_res = alg.compute(XX, ww) n_clusters = daal_res.nClusters[0, 0] assignments = daal_res.assignments.ravel() if daal_res.coreIndices is not None: core_ind = daal_res.coreIndices.ravel() else: core_ind = np.array([], dtype=np.intc) return (core_ind, assignments)
def _validate_input(self, X, y, sample_weight=None): """ Helper function to validate the inputs """ X, y = check_X_y(X, y, y_numeric=True, multi_output=True) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, check_input=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. outs = _rescale_data(X, y, sample_weight) X, y = outs[0], outs[1] return X, y, X_offset, y_offset, X_scale
def _validate_sample_weight( self, X: np.ndarray, y: np.ndarray, sample_weight: Union[None, np.ndarray, Iterable], ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: """Validate that the passed sample_weight and ensure it is a Numpy array. """ sample_weight = _check_sample_weight(sample_weight, X, dtype=np.dtype( tf.keras.backend.floatx())) # Scikit-Learn expects a 0 in sample_weight to mean # "ignore the sample", but because of how Keras applies # sample_weight to the loss function, this doesn't # exactly work out (as in, sklearn estimator checks fail # because the predictions differ by a small margin). # To get around this, we manually delete these samples here zeros = sample_weight == 0 if zeros.sum() == zeros.size: raise ValueError( "No training samples had any weight; only zeros were passed in sample_weight." " That means there's nothing to train on by definition, so training can not be completed." ) if np.any(zeros): X = X[~zeros] y = y[~zeros] sample_weight = sample_weight[~zeros] return X, y, sample_weight
def fit(self, X, y=None, sample_weight=None): """Compute kernel k-means clustering. Parameters ---------- X : array-like of shape=(n_ts, sz, d) Time series dataset. y Ignored sample_weight : array-like of shape=(n_ts, ) or None (default: None) Weights to be given to time series in the learning process. By default, all time series weights are equal. """ X = check_array(X, allow_nd=True, force_all_finite=False) X = check_dims(X) sample_weight = _check_sample_weight(sample_weight=sample_weight, X=X) max_attempts = max(self.n_init, 10) self.labels_ = None self.inertia_ = None self.sample_weight_ = None self._X_fit = None # n_iter_ will contain the number of iterations the most # successful run required. self.n_iter_ = 0 n_samples = X.shape[0] K = self._get_kernel(X) sw = (sample_weight if sample_weight is not None else numpy.ones(n_samples)) self.sample_weight_ = sw rs = check_random_state(self.random_state) last_correct_labels = None min_inertia = numpy.inf n_attempts = 0 n_successful = 0 while n_successful < self.n_init and n_attempts < max_attempts: try: if self.verbose and self.n_init > 1: print("Init %d" % (n_successful + 1)) n_attempts += 1 self._fit_one_init(K, rs) if self.inertia_ < min_inertia: last_correct_labels = self.labels_ min_inertia = self.inertia_ self.n_iter_ = self._iter n_successful += 1 except EmptyClusterError: if self.verbose: print("Resumed because of empty cluster") if n_successful > 0: self.labels_ = last_correct_labels self.inertia_ = min_inertia self._X_fit = X return self
def _check_data_params(obj, X, y, conf_score): """Extracted out of BaseHandler for WeightedBag & Costing""" # Reproducibility rns = check_random_state(obj.random_state) for k, v in obj.get_params().items(): if isinstance(v, BaseEstimator) and 'random_state' in v.get_params(): v.set_params(random_state=rns.randint(10**8)) # Parallelization if obj.classifier is not None and 'n_jobs' in obj.classifier.get_params(): obj.classifier.set_params(n_jobs=obj.n_jobs) if obj.detector is not None and 'n_jobs' in obj.detector.get_params(): obj.detector.set_params(n_jobs=obj.n_jobs) if conf_score is None and obj.detector is None: raise ValueError( "Neither conf_score or Detector is supplied to Handler") if conf_score is None: # outside Pipeline/ inside Iterative Handler conf_score = obj.detector.detect(X, y) X, y = obj._validate_data(X, y) obj.classes_ = np.unique(y) conf_score = _check_sample_weight(conf_score, X) return X, y, conf_score
def fit(self, X, y, sample_weight=None): X, y = check_X_y(X, y, y_numeric=True, multi_output=True) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) X, y, X_offset, y_offset, X_scale = _preprocess_data( X, y, fit_intercept=self.fit_intercept, normalize=self.normalize, copy=self.copy_X, sample_weight=sample_weight, return_mean=True) if sample_weight is not None: # Sample weight can be implemented via a simple rescaling. X, y = _rescale_data(X, y, sample_weight) self.is_fitted_ = True coef, alpha = fracridge(X, y, fracs=self.fracs) self.alpha_ = alpha self.coef_ = coef self._set_intercept(X_offset, y_offset, X_scale) return self
def fit(self, X, y, sample_weight=None, **kwargs): """Constructs a new model with `build_fn` & fit the model to `(X, y)`. Arguments: X : array-like, shape `(n_samples, n_features)` Training samples where `n_samples` is the number of samples and `n_features` is the number of features. y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. The Keras Model must support this. **kwargs: dictionary arguments Legal arguments are the arguments of the keras model's `fit` method. Returns: self : object a reference to the instance that can be chain called (ex: instance.fit(X,y).transform(X) ) Raises: ValueError : In case of invalid shape for `y` argument. ValuError : In case sample_weight != None and the Keras model's `fit` method does not support that parameter. """ # basic checks X, y = check_X_y( X, y, allow_nd=True, # allow X to have more than 2 dimensions multi_output=True, # allow y to be 2D ) X = check_array(X, allow_nd=True, dtype=["float64", "int"]) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=["float64", "int"]) # pre process X, y X, _ = self._pre_process_X(X) y, extra_args = self._pre_process_y(y) # update self.classes_, self.n_outputs_, self.n_classes_ and # self.cls_type_ for attr_name, attr_val in extra_args.items(): setattr(self, attr_name, attr_val) # build model self.model_ = self._build_keras_model(X, y, sample_weight=sample_weight, **kwargs) y = self._check_output_model_compatibility(y) # fit model return self._fit_keras_model(X, y, sample_weight=sample_weight, **kwargs)
def fit(self, Knm, Kmm, y=None, sample_weight=None): """Fit KernelFlexibleCenterer Parameters --------- Knm: ndarray of shape (n_samples, n_active) Kernel matrix between the reference data set and the active set Kmm: ndarray of shape (n_active, n_active) Kernel matrix between the active set and itself y : None Ignored. sample_weight: ndarray of shape (n_samples,), default=None Weights for each sample. Sample weighting can be used to center (and scale) data using a weighted mean. Weights are internally normalized before preprocessing. Returns ------- self : object Fitted transformer. """ if Knm.shape[1] != Kmm.shape[0]: raise ValueError( "The reference kernel is not commensurate shape with the" "active kernel.") if Kmm.shape[0] != Kmm.shape[1]: raise ValueError("The active kernel is not square.") if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, Knm, dtype=Knm.dtype) sample_weight = sample_weight / np.sum(sample_weight) self.n_active_ = Kmm.shape[0] if self.with_center: self.K_fit_rows_ = np.average(Knm, weights=sample_weight, axis=0) else: self.K_fit_rows_ = np.zeros(Knm.shape[1]) if self.with_trace: Knm_centered = Knm - self.K_fit_rows_ Khat = Knm_centered @ np.linalg.pinv(Kmm, self.rcond) @ Knm_centered.T self.scale_ = np.sqrt(np.trace(Khat) / Knm.shape[0]) else: self.scale_ = 1.0 return self
def fit(self, X, y=None, sample_weight=None): """Perform DBSCAN clustering from features, or distance matrix. Parameters ---------- X : array-like or sparse matrix, shape (n_samples, n_features), or \ (n_samples, n_samples) Training instances to cluster, or distances between instances if ``metric='precomputed'``. If a sparse matrix is provided, it will be converted into a sparse ``csr_matrix``. sample_weight : array, shape (n_samples,), optional Weight of each sample, such that a sample with a weight of at least ``min_samples`` is by itself a core sample; a sample with a negative weight may inhibit its eps-neighbor from being core. Note that weights are absolute, and default to 1. y : Ignored Not used, present here for API consistency by convention. Returns ------- self """ X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32]) if self.eps <= 0.0: raise ValueError("eps must be positive.") if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) _daal_ready = self.algorithm in [ 'auto', 'brute'] and ( self.metric == 'euclidean' or ( self.metric == 'minkowski' and self.p == 2)) and isinstance( X, np.ndarray) if _daal_ready: logging.info( "sklearn.cluster.DBSCAN." "fit: " + get_patch_message("daal")) core_ind, assignments = _daal_dbscan( X, self.eps, self.min_samples, sample_weight=sample_weight) self.core_sample_indices_ = core_ind self.labels_ = assignments self.components_ = np.take(X, core_ind, axis=0) return self logging.info( "sklearn.cluster.DBSCAN." "fit: " + get_patch_message("sklearn")) return super().fit(X, y, sample_weight=sample_weight)
def check_sample_weight(sample_weight, X): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) use_sample_weight = True if np.array_equal(np.unique(sample_weight), np.array([1.0])): sample_weight = None use_sample_weight = False else: use_sample_weight = False return sample_weight, use_sample_weight
def fit(self, X, y, sample_weight=None, check_input=True): """Fit a shapelet tree regressor from the training set Parameters ---------- X : array-like of shape (n_samples, n_timesteps) The training time series. y : array-like of shape (n_samples,) Target values as floating point values sample_weight : array-like of shape (n_samples,) If `None`, then samples are equally weighted. Splits that would create child nodes with net zero or negative weight are ignored while searching for a split in each node. Splits are also ignored if they would result in any single class carrying a negative weight in either child node. check_input : bool, optional Allow to bypass several input checking. Don't use this parameter unless you know what you do. Returns ------- self: object """ if check_input: X = check_array(X, allow_multivariate=True, dtype=float) y = check_array(y, ensure_2d=False, dtype=float) n_samples = X.shape[0] if isinstance(self.force_dim, int): X = np.reshape(X, [n_samples, self.force_dim, -1]) n_timesteps = X.shape[-1] if X.ndim > 2: n_dims = X.shape[1] else: n_dims = 1 if len(y) != n_samples: raise ValueError("Number of labels={} does not match " "number of samples={}".format(len(y), n_samples)) self.n_timestep_ = n_timesteps self.n_dims_ = n_dims random_state = check_random_state(self.random_state) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=float) self._fit(X, y, sample_weight, random_state) return self
def fit(self, X, y=None, sample_weight=None): """Compute mean and scaling to be applied for subsequent normalization. Parameters ---------- X : ndarray of shape (n_samples, n_features) The data used to compute the mean and standard deviation used for later scaling along the features axis. y: None Ignored. sample_weight: ndarray of shape (n_samples,) Weights for each sample. Sample weighting can be used to center (and scale) data using a weighted mean. Weights are internally normalized before preprocessing. Returns ------- self : object Fitted scaler. """ self.n_samples_seen_, self.n_features_ = X.shape if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) sample_weight = sample_weight / np.sum(sample_weight) if self.with_mean: self.mean_ = np.average(X, weights=sample_weight, axis=0) else: self.mean_ = np.zeros(self.n_features_) self.scale_ = 1.0 if self.with_std: X_mean = np.average(X, weights=sample_weight, axis=0) var = np.average((X - X_mean)**2, weights=sample_weight, axis=0) if self.column_wise: if np.any(var < self.atol + abs(X_mean) * self.rtol): raise ValueError( "Cannot normalize a feature with zero variance") self.scale_ = np.sqrt(var) else: var_sum = var.sum() if var_sum < abs(np.mean(X_mean)) * self.rtol + self.atol: raise ValueError( "Cannot normalize a matrix with zero variance") self.scale_ = np.sqrt(var_sum) return self
def _fit_classifier(self, X, y, sample_weight=None): if sp.issparse(y): raise ValueError("sparse multilabel-indicator for y is not supported.") _check_parameters(self) if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) daal_ready = (self.warm_start is False and self.criterion == "gini" and self.ccp_alpha == 0.0 and not sp.issparse(X)) if daal_ready: _supported_dtypes_ = [np.single, np.double] X = check_array(X, dtype=_supported_dtypes_) y = np.asarray(y) y = np.atleast_1d(y) if y.ndim == 2 and y.shape[1] == 1: warnings.warn( "A column-vector y was passed when a 1d array was" " expected. Please change the shape of y to " "(n_samples,), for example using ravel().", DataConversionWarning, stacklevel=2) check_consistent_length(X, y) if y.ndim == 1: # reshape is necessary to preserve the data contiguity against vs # [:, np.newaxis] that does not. y = np.reshape(y, (-1, 1)) self.n_outputs_ = y.shape[1] if self.n_outputs_ != 1: daal_ready = False if daal_ready: logging.info("sklearn.ensemble.RandomForestClassifier.fit: " + method_uses_daal) _daal_fit_classifier(self, X, y, sample_weight=sample_weight) if not hasattr(self, "estimators_"): self.estimators_ = self._estimators_ # Decapsulate classes_ attributes self.n_classes_ = self.n_classes_[0] self.classes_ = self.classes_[0] return self else: logging.info("sklearn.ensemble.RandomForestClassifier.fit: " + method_uses_sklearn) return super(RandomForestClassifier, self).fit(X, y, sample_weight=sample_weight)
def check_null_weight( sample_weight: Optional[ArrayLike], X: ArrayLike, y: ArrayLike) -> Tuple[Optional[NDArray], ArrayLike, ArrayLike]: """ Check sample weights and remove samples with null sample weights. Parameters ---------- sample_weight : Optional[ArrayLike] of shape (n_samples,) Sample weights. X : ArrayLike of shape (n_samples, n_features) Training samples. y : ArrayLike of shape (n_samples,) Training labels. Returns ------- sample_weight : Optional[NDArray] of shape (n_samples,) Non-null sample weights. X : ArrayLike of shape (n_samples, n_features) Training samples with non-null weights. y : ArrayLike of shape (n_samples,) Training labels with non-null weights. Examples -------- >>> import numpy as np >>> from mapie.utils import check_null_weight >>> X = np.array([[0], [1], [2], [3], [4], [5]]) >>> y = np.array([5, 7, 9, 11, 13, 15]) >>> sample_weight = np.array([0, 1, 1, 1, 1, 1]) >>> sample_weight, X, y = check_null_weight(sample_weight, X, y) >>> print(sample_weight) [1. 1. 1. 1. 1.] >>> print(X) [[1] [2] [3] [4] [5]] >>> print(y) [ 7 9 11 13 15] """ if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) non_null_weight = sample_weight != 0 X = _safe_indexing(X, non_null_weight) y = _safe_indexing(y, non_null_weight) sample_weight = _safe_indexing(sample_weight, non_null_weight) sample_weight = cast(Optional[NDArray], sample_weight) return sample_weight, X, y
def _check_sample_weight(self, sample_weight, X): if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) use_sample_weight = True # ranger does additional rng on samples if weights are passed. # if the weights are ones, then we dont want that extra rng. if np.array_equal(np.unique(sample_weight), np.array([1.0])): sample_weight = [] use_sample_weight = False else: sample_weight = [] use_sample_weight = False return sample_weight, use_sample_weight
def _check_normalize_sample_weight(sample_weight, X): """Set sample_weight if None, and check for correct dtype""" sample_weight_was_none = sample_weight is None sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) if not sample_weight_was_none: # normalize the weights to sum up to n_samples # an array of 1 (i.e. samples_weight is None) is already normalized n_samples = len(sample_weight) scale = n_samples / sample_weight.sum() sample_weight *= scale return sample_weight
def _prepare_inputs(self, X, sample_weight, y): X, y = check_X_y(X, y) sample_weight = _check_sample_weight(sample_weight, X) self.n_features_in_ = X.shape[1] n = X.shape[0] if self.copy_X: X_ = X.copy() else: X_ = X if self.fit_intercept: X_ = np.hstack([X_, np.ones(shape=(n, 1))]) loss, grad_loss = self._get_objective(X_, y, sample_weight) return X_, grad_loss, loss
def _daal4py_check_weight(self, X, y, sample_weight): ww = None if sample_weight.shape[0] > 0: sample_weight = _check_sample_weight(sample_weight, X) if np.all(sample_weight <= 0): raise ValueError('Invalid input - all samples have zero or negative weights.') elif np.any(sample_weight <= 0): if len(np.unique(y[sample_weight > 0])) != len(self.classes_): raise ValueError('Invalid input - all samples with positive weights have the same label.') ww = sample_weight elif self.class_weight is not None: ww = np.ones(X.shape[0], dtype=np.float64) if self.class_weight is not None: for i, v in enumerate(self.class_weight_): ww[y == i] *= v if ww is not None: ww = make2d(ww) return ww
def fit(self, X, y, sample_weight=None): """ Parameters ---------- X : array-like of shape (n_samples, n_features) Training data. y : array-like of shape (n_samples, n_targets) Target values. sample_weight, array-like of shape (n_samples,), default=Noone Individual weights for each sample. Returns ------- self """ sample_weight = _check_sample_weight(sample_weight, X) self.y_mean_ = (y * sample_weight).mean() / sample_weight.mean() return self
def fit(self, X, y=None, sample_weight=None): """Fit Kernel Ridge regression model Parameters ---------- X : {array-like, sparse matrix} of shape (n_samples, n_features) Training data. If kernel == "precomputed" this is instead a precomputed kernel matrix, of shape (n_samples, n_samples). y : array-like of shape (n_samples,) or (n_samples, n_targets) Target values sample_weight : float or array-like of shape [n_samples] Individual weights for each sample, ignored if None is passed. Returns ------- self : returns an instance of self. """ # Convert data t0 = time.time() #X, y = self._validate_data(X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True) if sample_weight is not None and not isinstance(sample_weight, float): sample_weight = _check_sample_weight(sample_weight, X) K = self._get_kernel(X) alpha = np.atleast_1d(self.alpha) ravel = False if len(y.shape) == 1: y = y.reshape(-1, 1) ravel = True copy = self.kernel == "precomputed" self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy) if ravel: self.dual_coef_ = self.dual_coef_.ravel() self.X_fit_ = X t1 = time.time() - t0 #print("KRR fitted in %.3f s" % t1) return self
def transform(self, X, y=None, sample_weight=None): """Transform the data X into the topic space of the fitted pLSA model. Parameters ---------- X: array or sparse matrix of shape (n_docs, n_words) Corpus to be embedded into topic space y: Ignored Returns ------- embedding: array of shape (n_docs, n_topics) An embedding of the documents X into the topic space. """ X = check_array(X, accept_sparse="csr") sample_weight = _check_sample_weight(sample_weight, X, dtype=np.float32) random_state = check_random_state(self.transform_random_seed) if not issparse(X): X = coo_matrix(X) else: X = X.tocoo() result = plsa_refit( X, self.components_, sample_weight, block_size=self.block_size, n_iter=50, n_iter_per_test=5, tolerance=0.001, random_state=random_state, ) return result
def score(self, X, y, sample_weight=None, **kwargs): """Returns the mean accuracy on the given test data and labels. Arguments: X: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. The Keras Model must support this. **kwargs: dictionary arguments Legal arguments are those of self.model_.evaluate. Returns: score: float Mean accuracy of predictions on `X` wrt. `y`. Raises: ValueError: If the underlying model isn't configured to compute accuracy. You should pass `metrics=["accuracy"]` to the `.compile()` method of the model. """ # validate sample weights if sample_weight is not None: sample_weight = _check_sample_weight( sample_weight, X, dtype=["float64", "int"] ) # pre process X, y _, extra_args = self._pre_process_y(y) # compute Keras model score y_pred = self.predict(X, **kwargs) return self._scorer(y, y_pred, sample_weight=sample_weight)
def score(self, X, y, sample_weight=None): """Returns the mean accuracy on the given test data and labels. Arguments: X: array-like, shape `(n_samples, n_features)` Test samples where `n_samples` is the number of samples and `n_features` is the number of features. y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` True labels for `X`. sample_weight : array-like of shape (n_samples,), default=None Sample weights. The Keras Model must support this. Returns: score: float Mean accuracy of predictions on `X` wrt. `y`. """ # validate sample weights if sample_weight is not None: sample_weight = _check_sample_weight(sample_weight, X) # validate y y = check_array(y, ensure_2d=False) # compute Keras model score y_pred = self.predict(X) # filter kwargs and get attributes for score params = self.get_params() score_args = route_params(params, destination="score", pass_filter=set()) return self.scorer(y, y_pred, sample_weight=sample_weight, **score_args)