Ejemplo n.º 1
0
    def score(self, x, y):
        """
        Returns the mean accuracy on the given test data and labels.

        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            Test samples.
        y : ds-array, shape=(n_samples, 1)
            True labels for x.

        Returns
        -------
        score : float (as future object)
            Mean accuracy of self.predict(x) wrt. y.
        """
        assert (self._clf is not None or self._svs is not None), \
            "Model has not been initialized. Call fit() first."

        partial_scores = []

        for x_row, y_row in _paired_partition(x, y):
            partial = _score(x_row._blocks, y_row._blocks, self._clf)
            partial_scores.append(partial)

        return _merge_scores(*partial_scores)
Ejemplo n.º 2
0
    def score(self, x, y, collect=False):
        """
        Returns the mean accuracy on the given test data and labels.

        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            Test samples.
        y : ds-array, shape=(n_samples, 1)
            True labels for x.
        collect : bool, optional (default=False)
            When True, a synchronized result is returned.

        Returns
        -------
        score : float (as future object)
            Mean accuracy of self.predict(x) wrt. y.
        """
        assert (self._clf is not None or self._svs is not None), \
            "Model has not been initialized. Call fit() first."

        partial_scores = []

        for x_row, y_row in _paired_partition(x, y):
            partial = _score(x_row._blocks, y_row._blocks, self._clf)
            partial_scores.append(partial)

        score = _merge_scores(*partial_scores)

        return compss_wait_on(score) if collect else score
Ejemplo n.º 3
0
    def score(self, x, y, collect=False):
        """Accuracy classification score.

        Returns the mean accuracy of the predictions on the given test data.

        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            The training input samples.
        y : ds-array, shape (n_samples, 1)
            The true labels.
        collect : bool, optional (default=False)
            When True, a synchronized result is returned.


        Returns
        -------
        score : float (as future object)
            Fraction of correctly classified samples.
        """
        assert self.trees is not None, "The random forest is not fitted."

        partial_scores = []
        if self.hard_vote:
            for x_row, y_row in _paired_partition(x, y):
                tree_predictions = []
                for tree in self.trees:
                    tree_predictions.append(tree.predict(x_row))
                subset_score = _hard_vote_score(y_row._blocks, self.classes,
                                                *tree_predictions)
                partial_scores.append(subset_score)

        else:
            for x_row, y_row in _paired_partition(x, y):
                tree_predictions = []
                for tree in self.trees:
                    tree_predictions.append(tree.predict_proba(x_row))
                subset_score = _soft_vote_score(y_row._blocks, self.classes,
                                                *tree_predictions)
                partial_scores.append(subset_score)

        score = _merge_classification_scores(*partial_scores)

        return compss_wait_on(score) if collect else score
Ejemplo n.º 4
0
    def score(self, x, y):
        """Accuracy classification score.

        Returns the mean accuracy on the given test data.


        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            The training input samples.
        y : ds-array, shape (n_samples, 1)
            The true labels.

        Returns
        -------
        score : float (as future object)
            Fraction of correctly classified samples.

        """
        assert self.trees is not None, 'The random forest is not fitted.'
        partial_scores = []
        if self.hard_vote:
            for x_row, y_row in _paired_partition(x, y):
                tree_predictions = []
                for tree in self.trees:
                    tree_predictions.append(tree.predict(x_row))
                subset_score = _hard_vote_score(y_row._blocks, self.classes,
                                                *tree_predictions)
                partial_scores.append(subset_score)
        else:
            for x_row, y_row in _paired_partition(x, y):
                tree_predictions = []
                for tree in self.trees:
                    tree_predictions.append(tree.predict_proba(x_row))
                subset_score = _soft_vote_score(y_row._blocks, self.classes,
                                                *tree_predictions)
                partial_scores.append(subset_score)

        return _merge_scores(*partial_scores)
Ejemplo n.º 5
0
    def _w_step(self, x, y):
        w_blocks = []

        for xy_hblock, u_hblock in zip(_paired_partition(x, y),
                                       self._u._iterator()):
            x_hblock, y_hblock = xy_hblock
            w_hblock = [object() for _ in range(x._n_blocks[1])]
            x_blocks = x_hblock._blocks
            y_blocks = y_hblock._blocks
            u_blocks = u_hblock._blocks

            _update_w(x_blocks, y_blocks, self._z, u_blocks, self.rho,
                      self.loss_fn, w_hblock)
            w_blocks.append(w_hblock)

        r_shape = self._u._reg_shape
        self._w = Array(w_blocks, r_shape, r_shape, self._u.shape, x._sparse)
Ejemplo n.º 6
0
    def _do_iteration(self, x, y, ids_list):
        q = []
        pars = self._clf_params
        arity = self._arity

        # first level
        for partition, id_bk in zip(_paired_partition(x, y), ids_list):
            x_data = partition[0]._blocks
            y_data = partition[1]._blocks
            ids = [id_bk]

            if self._svs is not None:
                x_data.append(self._svs)
                y_data.append([self._sv_labels])
                ids.append([self._sv_ids])

            _tmp = _train(x_data, y_data, ids, self._random_state, **pars)
            sv, sv_labels, sv_ids, self._clf = _tmp
            q.append((sv, sv_labels, sv_ids))

        # reduction
        while len(q) > arity:
            data = q[:arity]
            del q[:arity]

            x_data = [tup[0] for tup in data]
            y_data = [[tup[1]] for tup in data]
            ids = [[tup[2]] for tup in data]

            _tmp = _train(x_data, y_data, ids, self._random_state, **pars)
            sv, sv_labels, sv_ids, self._clf = _tmp
            q.append((sv, sv_labels, sv_ids))

            # delete partial results
            for partial in data:
                compss_delete_object(partial)

        # last layer
        x_data = [tup[0] for tup in q]
        y_data = [[tup[1]] for tup in q]
        ids = [[tup[2]] for tup in q]

        _tmp = _train(x_data, y_data, ids, self._random_state, **pars)
        self._svs, self._sv_labels, self._sv_ids, self._clf = _tmp

        self.iterations += 1
Ejemplo n.º 7
0
    def score(self, x, y, collect=False):
        """R2 regression score.

        Returns the coefficient of determination $R^2$ of the prediction.
        The coefficient $R^2$ is defined as $(1-u/v)$, where $u$
        is the residual sum of squares `((y_true - y_pred) ** 2).sum()` and
        $v$ is the total sum of squares
        `((y_true - y_true.mean()) ** 2).sum()`.
        The best possible score is 1.0 and it can be negative
        if the model is arbitrarily worse.
        A constant model that always predicts the expected value of y,
        disregarding the input features, would get a $R^2$ score of 0.0.

        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            The training input samples.
        y : ds-array, shape (n_samples, 1)
            The true values.
        collect : bool, optional (default=False)
            When True, a synchronized result is returned.


        Returns
        -------
        score : float (as future object)
            Coefficient of determination $R^2$.
        """
        assert self.trees is not None, "The random forest is not fitted."

        partial_scores = []
        for x_row, y_row in _paired_partition(x, y):
            tree_predictions = []
            for tree in self.trees:
                tree_predictions.append(tree.predict(x_row))
            subset_score = _regression_score(y_row._blocks, *tree_predictions)
            partial_scores.append(subset_score)

        score = _merge_regression_scores(*partial_scores)

        return compss_wait_on(score) if collect else score