def _dku_index_param_value(X, v, indices): if not _is_arraylike(v) or _num_samples(v) != _num_samples(X): # pass through: skip indexing return v if sp.issparse(v): v = v.tocsr() return safe_indexing(v, indices)
def _index_param_value(X, v, indices): """Private helper function for parameter value indexing.""" if not _is_arraylike(v) or _num_samples(v) != _num_samples(X): # pass through: skip indexing return v if sp.issparse(v): v = v.tocsr() return _safe_indexing(v, indices)
def _index_param_value(X, v, indices): """Private helper function for parameter value indexing.""" if not _is_arraylike(v) or _num_samples(v) != _num_samples(X): # pass through: skip indexing return v if sp.issparse(v): v = v.tocsr() return safe_indexing(v, indices)
def extract_param(self, key, x, n): if self.cache is not None and (n, key) in self.cache: return self.cache[n, key] out = safe_indexing(x, self.splits[n][0]) if _is_arraylike(x) else x if self.cache is not None: self.cache[n, key] = out return out
def _check_preprocessor(self): """Initializes the preprocessor""" if _is_arraylike(self.preprocessor): self.preprocessor_ = ArrayIndexer(self.preprocessor) elif callable(self.preprocessor) or self.preprocessor is None: self.preprocessor_ = self.preprocessor else: raise ValueError("Invalid type for the preprocessor: {}. You should " "provide either None, an array-like object, " "or a callable.".format(type(self.preprocessor)))
def check_preprocessor(self): """Initializes the preprocessor""" if _is_arraylike(self.preprocessor): self.preprocessor_ = ArrayIndexer(self.preprocessor) elif callable(self.preprocessor) or self.preprocessor is None: self.preprocessor_ = self.preprocessor else: raise ValueError("Invalid type for the preprocessor: {}. You should " "provide either None, an array-like object, " "or a callable.".format(type(self.preprocessor)))
def _index_param_value(num_samples, v, indices): """Private helper function for parameter value indexing. This determines whether a fit parameter `v` to a SearchCV.fit should be indexed along with `X` and `y`. Note that this differs from the scikit-learn version. They pass `X` and compute num_samples. We pass `num_samples` instead. """ if not _is_arraylike(v) or _num_samples(v) != num_samples: # pass through: skip indexing return v if sp.issparse(v): v = v.tocsr() return safe_indexing(v, indices)
def __init__(self, estimator_cls, parameter_grid, score_fns, nfolds=10, shuffle=False, seed=None, njobs=1, checkpoint_path=None): self.estimator_cls = estimator_cls self.parameter_grid = parameter_grid self.nfolds = nfolds self.seed = seed assert njobs == 1, "# jobs > 1 not supported." self.njobs = njobs assert _is_arraylike(score_fns) self.score_fns = score_fns self.checkpoint_path = checkpoint_path self.grid_scores = None self.kf = KFold(n_folds=self.nfolds, shuffle=shuffle, random_state=seed)
def transform(self, X): if not _is_arraylike(X): raise TypeError("X is not iterable") transformed_X = list() for text in X: temp_vec = list() for token in self.tokenizer(text.lower()): temp_vec.append(self.glove_dict.get(token, self.glove_dict['unk'])) if self.combiner == 'mean': sentence_vec = np.mean(temp_vec, axis=0) else: sentence_vec = np.amax(temp_vec, axis=0) transformed_X.append(sentence_vec) return transformed_X
def nlargestarg(a, n): """Return n largest values' indexes of the given array a. Parameters ---------- a: {list, np.ndarray} Data array. n: int The number of returned args. Returns ------- nlargestarg: list The n largest args in array a. """ assert (validation._is_arraylike(a)) assert (n > 0) if isinstance(a, (list, np.ndarray)): argret = da.argtopk(da.from_array(a), n) else: argret = da.argtopk(a, n) # ascent return argret[argret.size - n:]
def _maybe_indexable(x): return indexable(x)[0] if _is_arraylike(x) else x
def cv_extract_param(x, indices): return safe_indexing(x, indices) if _is_arraylike(x) else x
selection. """ candidates = self[:] cases = list(range(len(self[0].error_vector))) random.shuffle(cases) if epsilon == 'auto': all_errors = np.array([i.error_vector[:] for i in candidates]) epsilon = np.apply_along_axis(median_absolute_deviation, 0, all_errors) while len(cases) > 0 and len(candidates) > 1: case = cases[0] errors_this_case = [i.error_vector[case] for i in candidates] best_val_for_case = min(errors_this_case) if _is_arraylike(epsilon): max_error = best_val_for_case + epsilon[case] else: max_error = best_val_for_case + epsilon test = lambda i: i.error_vector[case] <= max_error candidates = [i for i in candidates if test(i)] cases.pop(0) return random.choice(candidates) def tournament_selection(self, tournament_size=7): """Returns the individual with the lowest error within a random tournament. Parameters ---------- tournament_size : int
def query(self, X_cand, ensemble, X=None, y=None, sample_weight=None, batch_size=1, return_utilities=False): """Queries the next instance to be labeled. Parameters ---------- X_cand : array-like, shape (n_candidate_samples, n_features) Candidate samples from which the strategy can select. ensemble : {skactiveml.base.SkactivemlClassifier, array-like} If `ensemble` is a `SkactivemlClassifier`, it must have `n_estimators` and `estimators_` after fitting as attribute. Then, its estimators will be used as committee. If `ensemble` is array-like, each element of this list must be `SkactivemlClassifier` and will be used as committee member. X: array-like, shape (n_samples, n_features), optional (default=None) Complete training data set. y: array-like, shape (n_samples), optional (default=None) Labels of the training data set. sample_weight: array-like, shape (n_samples), optional (default=None) Weights of training samples in `X`. batch_size : int, optional (default=1) The number of samples to be selected in one AL cycle. return_utilities : bool, optional (default=False) If true, also return the utilities based on the query strategy. Returns ------- query_indices : numpy.ndarray, shape (batch_size) The query_indices indicate for which candidate sample a label is to queried, e.g., `query_indices[0]` indicates the first selected sample. utilities : numpy.ndarray, shape (batch_size, n_samples) The utilities of all candidate samples after each selected sample of the batch, e.g., `utilities[0]` indicates the utilities used for selecting the first sample (with index `query_indices[0]`) of the batch. """ # Validate input parameters. X_cand, return_utilities, batch_size, random_state = \ self._validate_data(X_cand, return_utilities, batch_size, self.random_state, reset=True) # Check attributed `method`. if self.method not in ['KL_divergence', 'vote_entropy']: raise ValueError( f"The given method {self.method} is not valid. " f"Supported methods are 'KL_divergence' and 'vote_entropy'") # Check if the parameter `ensemble` is valid. if isinstance(ensemble, SkactivemlClassifier) and \ (hasattr(ensemble, 'n_estimators') or hasattr(ensemble, 'estimators')): ensemble = fit_if_not_fitted( ensemble, X, y, sample_weight=sample_weight ) classes = ensemble.classes_ if hasattr(ensemble, 'estimators_'): est_arr = ensemble.estimators_ else: if hasattr(ensemble, 'estimators'): n_estimators = len(ensemble.estimators) else: n_estimators = ensemble.n_estimators est_arr = [ensemble] * n_estimators elif _is_arraylike(ensemble): est_arr = deepcopy(ensemble) for i in range(len(est_arr)): check_type(est_arr[i], f'ensemble[{i}]', SkactivemlClassifier) est_arr[i] = fit_if_not_fitted( est_arr[i], X, y, sample_weight=sample_weight ) if i > 0: np.testing.assert_array_equal( est_arr[i - 1].classes_, est_arr[i].classes_, err_msg=f'The inferred classes of the {i - 1}-th and ' f'{i}-th are not equal. Set the `classes` ' f'parameter of each ensemble member to avoid ' f'this error.' ) classes = est_arr[0].classes_ else: raise TypeError( f'`ensemble` must either be a `{SkactivemlClassifier} ' f'with the attribute `n_esembles` and `estimators_` after ' f'fitting or a list of {SkactivemlClassifier} objects.' ) # Compute utilities. if self.method == 'KL_divergence': probas = np.array([est.predict_proba(X_cand) for est in est_arr]) utilities = average_kl_divergence(probas) elif self.method == 'vote_entropy': votes = np.array([est.predict(X_cand) for est in est_arr]).T utilities = vote_entropy(votes, classes) return simple_batch(utilities, random_state, batch_size=batch_size, return_utilities=return_utilities)