def fit(self, X, y, sample_weight=None): """ Prepare different things for fast computation of metrics """ X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight) self._mask = numpy.array(y == self.uniform_label) assert sum(self._mask) > 0, 'No event of class, along which uniformity is desired' self._masked_weight = sample_weight[self._mask] X_part = numpy.array(take_features(X, self.uniform_features))[self._mask, :] self._bin_indices = ut.compute_bin_indices(X_part=X_part, n_bins=self.n_bins) self._bin_weights = ut.compute_bin_weights(bin_indices=self._bin_indices, sample_weight=self._masked_weight)
def fit(self, X, y, sample_weight=None): """ Prepare different things for fast computation of metrics """ X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight) self._mask = numpy.array(y == self.uniform_label) assert sum(self._mask) > 0, 'No events of uniform class!' self._masked_weight = sample_weight[self._mask] X_part = numpy.array(take_features(X, self.uniform_features))[self._mask, :] # computing knn indices neighbours = NearestNeighbors(n_neighbors=self.n_neighbours, algorithm='kd_tree').fit(X_part) _, self._groups_indices = neighbours.kneighbors(X_part) self._group_weights = ut.compute_group_weights(self._groups_indices, sample_weight=self._masked_weight)
def fit(self, X, y, sample_weight=None): """ Prepare different things for fast computation of metrics """ X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight) self._mask = numpy.array(y == self.uniform_label) assert sum(self._mask) > 0, 'No events of uniform class!' self._masked_weight = sample_weight[self._mask] X_part = numpy.array(take_features( X, self.uniform_features))[self._mask, :] # computing knn indices neighbours = NearestNeighbors(n_neighbors=self.n_neighbours, algorithm='kd_tree').fit(X_part) _, self._groups_indices = neighbours.kneighbors(X_part) self._group_weights = ut.compute_group_weights( self._groups_indices, sample_weight=self._masked_weight)
def fit(self, X, y, sample_weight=None): """ Prepare different things for fast computation of metrics """ X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight) self._mask = numpy.array(y == self.uniform_label) assert sum( self._mask ) > 0, 'No event of class, along which uniformity is desired' self._masked_weight = sample_weight[self._mask] X_part = numpy.array(take_features( X, self.uniform_features))[self._mask, :] self._bin_indices = ut.compute_bin_indices(X_part=X_part, n_bins=self.n_bins) self._bin_weights = ut.compute_bin_weights( bin_indices=self._bin_indices, sample_weight=self._masked_weight)
def fit(self, X, y, sample_weight=None): if self._is_classifier: self.classes_, y = numpy.unique(y, return_inverse=True) assert len( self.classes_) == 2, 'only binary classification supported' X, y, sample_weight = check_xyw(X, y, sample_weight=sample_weight, classification=self._is_classifier) if self.loss is None: if self._is_classifier: self.loss = losses.LogLoss(n_threads=self.n_threads) else: self.loss = losses.MSELoss() self.loss.fit(X, y, sample_weight=sample_weight) X = self._transform(X) n_samples, self.n_features_ = X.shape if isinstance(self.max_features, int): used_features = self.max_features else: assert isinstance(self.max_features, float) used_features = int( numpy.ceil(self.max_features * self.n_features_)) assert 0 < used_features <= self.n_features_, 'wrong max_features: {}'.format( self.max_features) assert numpy.max(X) < 128, 'bin indices should be smaller than 128' n_thresholds = int(numpy.max(X)) + 1 self.estimators = [] current_indices = numpy.zeros(n_samples, dtype=self._indices_type) pred = numpy.zeros(n_samples, dtype='float32') self.initial_bias_ = self.compute_optimal_step(pred) pred += self.initial_bias_ bootstrapper = _Bootstrapper(self.random_state, bootstrap=self.bootstrap, n_samples=n_samples) targets, weights = self.loss.prepare_tree_params(pred) for stage in range(self.n_estimators): bootstrap_weights = bootstrapper.generate_weights() columns_to_test = numpy.sort( self.random_state.choice(self.n_features_, size=used_features, replace=False)) feature, cut, best_improvements, best_cuts = build_decision( X, targets=targets, weights=weights, bootstrap_weights=bootstrap_weights, current_indices=current_indices, columns_to_test=columns_to_test, depth=self.depth, n_thresh=n_thresholds, reg=self._l2_regularization, use_friedman_mse=self.use_friedman_mse, n_threads=self.n_threads) leaf_values_placeholder = numpy.zeros(2**self.depth, dtype='float32') self.estimators.append([feature, cut, leaf_values_placeholder]) if (self.n_estimators - 1 - stage) % self.update_step == 0: self._update_leaves_and_predictions(current_indices, pred, target=targets, hessians=weights, stage=stage, n_stages=min( self.update_step, len(self.estimators))) # computing new tree parameters targets, weights = self.loss.prepare_tree_params(pred) return self