예제 #1
0
def _dku_index_param_value(X, v, indices):
    if not _is_arraylike(v) or _num_samples(v) != _num_samples(X):
        # pass through: skip indexing
        return v
    if sp.issparse(v):
        v = v.tocsr()
    return safe_indexing(v, indices)
예제 #2
0
def _index_param_value(X, v, indices):
    """Private helper function for parameter value indexing."""
    if not _is_arraylike(v) or _num_samples(v) != _num_samples(X):
        # pass through: skip indexing
        return v
    if sp.issparse(v):
        v = v.tocsr()
    return _safe_indexing(v, indices)
def _index_param_value(X, v, indices):
    """Private helper function for parameter value indexing."""
    if not _is_arraylike(v) or _num_samples(v) != _num_samples(X):
        # pass through: skip indexing
        return v
    if sp.issparse(v):
        v = v.tocsr()
    return safe_indexing(v, indices)
예제 #4
0
파일: methods.py 프로젝트: dask/dask-learn
    def extract_param(self, key, x, n):
        if self.cache is not None and (n, key) in self.cache:
            return self.cache[n, key]

        out = safe_indexing(x, self.splits[n][0]) if _is_arraylike(x) else x

        if self.cache is not None:
            self.cache[n, key] = out
        return out
예제 #5
0
    def extract_param(self, key, x, n):
        if self.cache is not None and (n, key) in self.cache:
            return self.cache[n, key]

        out = safe_indexing(x, self.splits[n][0]) if _is_arraylike(x) else x

        if self.cache is not None:
            self.cache[n, key] = out
        return out
예제 #6
0
 def _check_preprocessor(self):
   """Initializes the preprocessor"""
   if _is_arraylike(self.preprocessor):
     self.preprocessor_ = ArrayIndexer(self.preprocessor)
   elif callable(self.preprocessor) or self.preprocessor is None:
     self.preprocessor_ = self.preprocessor
   else:
     raise ValueError("Invalid type for the preprocessor: {}. You should "
                      "provide either None, an array-like object, "
                      "or a callable.".format(type(self.preprocessor)))
예제 #7
0
 def check_preprocessor(self):
   """Initializes the preprocessor"""
   if _is_arraylike(self.preprocessor):
     self.preprocessor_ = ArrayIndexer(self.preprocessor)
   elif callable(self.preprocessor) or self.preprocessor is None:
     self.preprocessor_ = self.preprocessor
   else:
     raise ValueError("Invalid type for the preprocessor: {}. You should "
                      "provide either None, an array-like object, "
                      "or a callable.".format(type(self.preprocessor)))
예제 #8
0
def _index_param_value(num_samples, v, indices):
    """Private helper function for parameter value indexing.

    This determines whether a fit parameter `v` to a SearchCV.fit
    should be indexed along with `X` and `y`. Note that this differs
    from the scikit-learn version. They pass `X` and compute num_samples.
    We pass `num_samples` instead.
    """
    if not _is_arraylike(v) or _num_samples(v) != num_samples:
        # pass through: skip indexing
        return v
    if sp.issparse(v):
        v = v.tocsr()
    return safe_indexing(v, indices)
예제 #9
0
 def __init__(self, estimator_cls, parameter_grid, score_fns,
              nfolds=10, shuffle=False, seed=None, njobs=1,
              checkpoint_path=None):
     self.estimator_cls = estimator_cls
     self.parameter_grid = parameter_grid
     self.nfolds = nfolds
     self.seed = seed
     assert njobs == 1, "# jobs > 1 not supported."
     self.njobs = njobs
     assert _is_arraylike(score_fns)
     self.score_fns = score_fns
     self.checkpoint_path = checkpoint_path
     self.grid_scores = None
     self.kf = KFold(n_folds=self.nfolds,
                     shuffle=shuffle,
                     random_state=seed)
예제 #10
0
    def transform(self, X):
        if not _is_arraylike(X):
            raise TypeError("X is not iterable")

        transformed_X = list()
        for text in X:
            temp_vec = list()
            for token in self.tokenizer(text.lower()):
                temp_vec.append(self.glove_dict.get(token, self.glove_dict['unk']))

            if self.combiner == 'mean':
                sentence_vec = np.mean(temp_vec, axis=0)
            else:
                sentence_vec = np.amax(temp_vec, axis=0)

            transformed_X.append(sentence_vec)
        return transformed_X
예제 #11
0
def nlargestarg(a, n):
    """Return n largest values' indexes of the given array a.
    Parameters
    ----------
    a: {list, np.ndarray}
        Data array.
    n: int
        The number of returned args.
    Returns
    -------
    nlargestarg: list
        The n largest args in array a.
    """
    assert (validation._is_arraylike(a))
    assert (n > 0)
    if isinstance(a, (list, np.ndarray)):
        argret = da.argtopk(da.from_array(a), n)
    else:
        argret = da.argtopk(a, n)

    # ascent
    return argret[argret.size - n:]
예제 #12
0
def _maybe_indexable(x):
    return indexable(x)[0] if _is_arraylike(x) else x
예제 #13
0
def cv_extract_param(x, indices):
    return safe_indexing(x, indices) if _is_arraylike(x) else x
                selection.
            """
        candidates = self[:]
        cases = list(range(len(self[0].error_vector)))
        random.shuffle(cases)

        if epsilon == 'auto':
            all_errors = np.array([i.error_vector[:] for i in candidates])
            epsilon = np.apply_along_axis(median_absolute_deviation, 0,
                                          all_errors)

        while len(cases) > 0 and len(candidates) > 1:
            case = cases[0]
            errors_this_case = [i.error_vector[case] for i in candidates]
            best_val_for_case = min(errors_this_case)
            if _is_arraylike(epsilon):
                max_error = best_val_for_case + epsilon[case]
            else:
                max_error = best_val_for_case + epsilon
            test = lambda i: i.error_vector[case] <= max_error
            candidates = [i for i in candidates if test(i)]
            cases.pop(0)
        return random.choice(candidates)

    def tournament_selection(self, tournament_size=7):
        """Returns the individual with the lowest error within a random
        tournament.

        Parameters
        ----------
        tournament_size : int
예제 #15
0
    def query(self, X_cand, ensemble, X=None, y=None, sample_weight=None,
              batch_size=1, return_utilities=False):
        """Queries the next instance to be labeled.

        Parameters
        ----------
        X_cand : array-like, shape (n_candidate_samples, n_features)
            Candidate samples from which the strategy can select.
        ensemble : {skactiveml.base.SkactivemlClassifier, array-like}
            If `ensemble` is a `SkactivemlClassifier`, it must have
            `n_estimators` and `estimators_` after fitting as attribute. Then,
            its estimators will be used as committee. If `ensemble` is
            array-like, each element of this list must be
            `SkactivemlClassifier` and will be used as committee member.
        X: array-like, shape (n_samples, n_features), optional (default=None)
            Complete training data set.
        y: array-like, shape (n_samples), optional (default=None)
            Labels of the training data set.
        sample_weight: array-like, shape (n_samples), optional
        (default=None)
            Weights of training samples in `X`.
        batch_size : int, optional (default=1)
            The number of samples to be selected in one AL cycle.
        return_utilities : bool, optional (default=False)
            If true, also return the utilities based on the query strategy.

        Returns
        -------
        query_indices : numpy.ndarray, shape (batch_size)
            The query_indices indicate for which candidate sample a label is
            to queried, e.g., `query_indices[0]` indicates the first selected
            sample.
        utilities : numpy.ndarray, shape (batch_size, n_samples)
            The utilities of all candidate samples after each selected
            sample of the batch, e.g., `utilities[0]` indicates the utilities
            used for selecting the first sample (with index `query_indices[0]`)
            of the batch.
        """
        # Validate input parameters.
        X_cand, return_utilities, batch_size, random_state = \
            self._validate_data(X_cand, return_utilities, batch_size,
                                self.random_state, reset=True)

        # Check attributed `method`.
        if self.method not in ['KL_divergence', 'vote_entropy']:
            raise ValueError(
                f"The given method {self.method} is not valid. "
                f"Supported methods are 'KL_divergence' and 'vote_entropy'")

        # Check if the parameter `ensemble` is valid.
        if isinstance(ensemble, SkactivemlClassifier) and \
                (hasattr(ensemble, 'n_estimators')
                 or hasattr(ensemble, 'estimators')):
            ensemble = fit_if_not_fitted(
                ensemble, X, y, sample_weight=sample_weight
            )
            classes = ensemble.classes_
            if hasattr(ensemble, 'estimators_'):
                est_arr = ensemble.estimators_
            else:
                if hasattr(ensemble, 'estimators'):
                    n_estimators = len(ensemble.estimators)
                else:
                    n_estimators = ensemble.n_estimators
                est_arr = [ensemble] * n_estimators
        elif _is_arraylike(ensemble):
            est_arr = deepcopy(ensemble)
            for i in range(len(est_arr)):
                check_type(est_arr[i], f'ensemble[{i}]', SkactivemlClassifier)
                est_arr[i] = fit_if_not_fitted(
                    est_arr[i], X, y, sample_weight=sample_weight
                )
                if i > 0:
                    np.testing.assert_array_equal(
                        est_arr[i - 1].classes_, est_arr[i].classes_,
                        err_msg=f'The inferred classes of the {i - 1}-th and '
                                f'{i}-th are not equal. Set the `classes` '
                                f'parameter of each ensemble member to avoid '
                                f'this error.'
                    )
            classes = est_arr[0].classes_
        else:
            raise TypeError(
                f'`ensemble` must either be a `{SkactivemlClassifier} '
                f'with the attribute `n_esembles` and `estimators_` after '
                f'fitting or a list of {SkactivemlClassifier} objects.'
            )

        # Compute utilities.
        if self.method == 'KL_divergence':
            probas = np.array([est.predict_proba(X_cand) for est in est_arr])
            utilities = average_kl_divergence(probas)
        elif self.method == 'vote_entropy':
            votes = np.array([est.predict(X_cand) for est in est_arr]).T
            utilities = vote_entropy(votes, classes)

        return simple_batch(utilities, random_state,
                            batch_size=batch_size,
                            return_utilities=return_utilities)
예제 #16
0
파일: utils.py 프로젝트: dask/dask-learn
def _maybe_indexable(x):
    return indexable(x)[0] if _is_arraylike(x) else x