Beispiel #1
0
    def fit(self, X, y):
        """
        Build the classifier on the training set (X, y)
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.  If a Pandas data frame is passed,
            column 0 is extracted.
        y : array-like, shape = [n_instances]
            The class labels.
        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_pandas=True)

        self.X = positive_dataframe_indices(X)
        self.random_state = check_random_state(self.random_state)
        # setup label encoding
        if self.label_encoder is None:
            self.label_encoder = LabelEncoder()
            y = self.label_encoder.fit_transform(y)
        self.y = y
        self.classes_ = self.label_encoder.classes_
        if self.distance_measure is None:
            if self.get_distance_measure is None:
                self.get_distance_measure = self.setup_distance_measure(self)
            self.distance_measure = self.get_distance_measure(self)
        self.X_exemplar, self.y_exemplar = self.pick_exemplars(self)
        self._is_fitted = True
        return self
Beispiel #2
0
    def fit(self, X, y):
        """Build the classifier on the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.  If a Pandas data frame is passed,
            column 0 is extracted.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_pandas=True)
        self.X = positive_dataframe_indices(X)
        self.random_state = check_random_state(self.random_state)
        if self.find_stump is None:
            self.find_stump = best_of_n_stumps(self.n_stump_evaluations)
        # setup label encoding
        if self.label_encoder is None:
            self.label_encoder = LabelEncoder()
            y = self.label_encoder.fit_transform(y)
        self.y = y
        self.classes_ = self.label_encoder.classes_
        if self.distance_measure is None:
            if self.get_distance_measure is None:
                self.get_distance_measure = self.setup_distance_measure(self)
            self.distance_measure = self.get_distance_measure(self)
        self.stump = self.find_stump(self)
        n_branches = len(self.stump.y_exemplar)
        self.branches = [None] * n_branches
        if self.depth < self.max_depth:
            for index in range(n_branches):
                sub_y = self.stump.y_branches[index]
                if not self.is_leaf(sub_y):
                    sub_tree = ProximityTree(
                        random_state=self.random_state,
                        get_exemplars=self.get_exemplars,
                        distance_measure=self.distance_measure,
                        setup_distance_measure=self.setup_distance_measure,
                        get_distance_measure=self.get_distance_measure,
                        get_gain=self.get_gain,
                        is_leaf=self.is_leaf,
                        verbosity=self.verbosity,
                        max_depth=self.max_depth,
                        n_jobs=self.n_jobs,
                    )
                    sub_tree.label_encoder = self.label_encoder
                    sub_tree.depth = self.depth + 1
                    self.branches[index] = sub_tree
                    sub_X = self.stump.X_branches[index]
                    sub_tree.fit(sub_X, sub_y)
        self._is_fitted = True
        return self
    def fit(self, X, y):
        """Build the classifier on the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.  If a Pandas data frame is passed,
            column 0 is extracted.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_pandas=True)
        self.X = positive_dataframe_indices(X)
        self.random_state = check_random_state(self.random_state)
        # setup label encoding
        if self.label_encoder is None:
            self.label_encoder = LabelEncoder()
            y = self.label_encoder.fit_transform(y)
        self.y = y
        self.classes_ = self.label_encoder.classes_
        if self.distance_measure is None:
            if self.get_distance_measure is None:
                self.get_distance_measure = self.setup_distance_measure_getter(self)
            self.distance_measure = self.get_distance_measure(self)
        if self.n_jobs > 1 or self.n_jobs < 0:
            parallel = Parallel(self.n_jobs)
            self.trees = parallel(
                delayed(self._fit_tree)(
                    X, y, index, self.random_state.randint(0, self.n_estimators)
                )
                for index in range(self.n_estimators)
            )
        else:
            self.trees = [
                self._fit_tree(
                    X, y, index, self.random_state.randint(0, self.n_estimators)
                )
                for index in range(self.n_estimators)
            ]
        self._is_fitted = True
        return self