def fit(self, X, y): """ Build the classifier on the training set (X, y) ---------- X : array-like or sparse matrix of shape = [n_instances, n_columns] The training input samples. If a Pandas data frame is passed, column 0 is extracted. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_pandas=True) self.X = dataset_properties.positive_dataframe_indices(X) self.random_state = check_random_state(self.random_state) # setup label encoding if self.label_encoder is None: self.label_encoder = LabelEncoder() y = self.label_encoder.fit_transform(y) self.y = y self.classes_ = self.label_encoder.classes_ if self.distance_measure is None: if self.get_distance_measure is None: self.get_distance_measure = self.setup_distance_measure(self) self.distance_measure = self.get_distance_measure(self) self.X_exemplar, self.y_exemplar = self.pick_exemplars(self) self._is_fitted = True return self
def fit(self, X, y, input_checks = True): """ Build the classifier on the training set (X, y) ---------- X : array-like or sparse matrix of shape = [n_instances, n_columns] The training input samples. If a Pandas data frame is passed, column 0 is extracted. y : array-like, shape = [n_instances] The class labels. input_checks: boolean whether to check the X and y parameters Returns ------- self : object """ if input_checks: validate_X_y(X, y) self.X = dataset_properties.positive_dataframe_indices(X) self.random_state = check_random_state(self.random_state) # setup label encoding self.label_encoder = LabelEncoder() self.label_encoder.fit(y) self.classes_ = self.label_encoder.classes_ self.y = self.label_encoder.transform(y) if self.distance_measure is None: if self.get_distance_measure is None: self.get_distance_measure = self.setup_distance_measure(self) self.distance_measure = self.get_distance_measure(self) self.X_exemplar, self.y_exemplar = self.pick_exemplars(self) return self
def fit(self, X, y, input_checks = True): """ Build the classifier on the training set (X, y) ---------- X : array-like or sparse matrix of shape = [n_instances, n_columns] The training input samples. If a Pandas data frame is passed, column 0 is extracted. y : array-like, shape = [n_instances] The class labels. input_checks: boolean whether to check the X and y parameters Returns ------- self : object """ if input_checks: validate_X_y(X, y) self.X = dataset_properties.positive_dataframe_indices(X) self.random_state = check_random_state(self.random_state) # setup label encoding self.label_encoder = LabelEncoder() self.label_encoder.fit(y) self.classes_ = self.label_encoder.classes_ self.y = self.label_encoder.transform(y) if self.distance_measure is None: if self.get_distance_measure is None: self.get_distance_measure = self.setup_distance_measure_getter(self) self.distance_measure = self.get_distance_measure(self) if self.n_jobs > 1 or self.n_jobs < 0: parallel = Parallel(self.n_jobs) self.trees = parallel(delayed(self._fit_tree)(X, y, index, self.random_state.randint(0, self.n_trees)) for index in range(self.n_trees)) else: self.trees = [self._fit_tree(X, y, index, self.random_state.randint(0, self.n_trees)) for index in range(self.n_trees)] return self
def fit(self, X, y): """ Build the classifier on the training set (X, y) ---------- X : array-like or sparse matrix of shape = [n_instances, n_columns] The training input samples. If a Pandas data frame is passed, column 0 is extracted. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_pandas=True) self.X = dataset_properties.positive_dataframe_indices(X) self.random_state = check_random_state(self.random_state) if self.find_stump is None: self.find_stump = best_of_n_stumps(self.n_stump_evaluations) # setup label encoding if self.label_encoder is None: self.label_encoder = LabelEncoder() y = self.label_encoder.fit_transform(y) self.y = y self.classes_ = self.label_encoder.classes_ if self.distance_measure is None: if self.get_distance_measure is None: self.get_distance_measure = self.setup_distance_measure(self) self.distance_measure = self.get_distance_measure(self) self.stump = self.find_stump(self) n_branches = len(self.stump.y_exemplar) self.branches = [None] * n_branches if self.depth < self.max_depth: for index in range(n_branches): sub_y = self.stump.y_branches[index] if not self.is_leaf(sub_y): sub_tree = ProximityTree( random_state=self.random_state, get_exemplars=self.get_exemplars, distance_measure=self.distance_measure, setup_distance_measure=self.setup_distance_measure, get_distance_measure=self.get_distance_measure, get_gain=self.get_gain, is_leaf=self.is_leaf, verbosity=self.verbosity, max_depth=self.max_depth, n_jobs=self.n_jobs, ) sub_tree.label_encoder = self.label_encoder sub_tree.depth = self.depth + 1 self.branches[index] = sub_tree sub_X = self.stump.X_branches[index] sub_tree.fit(sub_X, sub_y) self._is_fitted = True return self
def fit(self, X, y, input_checks = True): """ Build the classifier on the training set (X, y) ---------- X : array-like or sparse matrix of shape = [n_instances, n_columns] The training input samples. If a Pandas data frame is passed, column 0 is extracted. y : array-like, shape = [n_instances] The class labels. input_checks: boolean whether to check the X and y parameters Returns ------- self : object """ if input_checks: validate_X_y(X, y) self.X = dataset_properties.positive_dataframe_indices(X) self.random_state = check_random_state(self.random_state) # setup label encoding self.label_encoder = LabelEncoder() self.label_encoder.fit(y) self.classes_ = self.label_encoder.classes_ self.y = self.label_encoder.transform(y) if self.distance_measure is None: if self.get_distance_measure is None: self.get_distance_measure = self.setup_distance_measure(self) self.distance_measure = self.get_distance_measure(self) self.stump = self.find_stump(self) n_branches = len(self.stump.y_exemplar) self.branches = [None] * n_branches if self.depth < self.max_depth: for index in range(n_branches): sub_y = self.stump.y_branches[index] if not self.is_leaf(sub_y): sub_tree = ProximityTree( random_state=self.random_state, get_exemplars=self.get_exemplars, distance_measure=self.distance_measure, setup_distance_measure=self.setup_distance_measure, get_distance_measure=self.get_distance_measure, get_gain=self.get_gain, is_leaf=self.is_leaf, verbosity=self.verbosity, max_depth=self.max_depth, n_jobs=self.n_jobs ) sub_tree.depth = self.depth + 1 self.branches[index] = sub_tree sub_X = self.stump.X_branches[index] sub_tree.fit(sub_X, sub_y) return self
def fit(self, X, y): """ Build the classifier on the training set (X, y) ---------- X : array-like or sparse matrix of shape = [n_instances, n_columns] The training input samples. If a Pandas data frame is passed, column 0 is extracted. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_pandas=True) self.X = dataset_properties.positive_dataframe_indices(X) self.random_state = check_random_state(self.random_state) # setup label encoding if self.label_encoder is None: self.label_encoder = LabelEncoder() y = self.label_encoder.fit_transform(y) self.y = y self.classes_ = self.label_encoder.classes_ if self.distance_measure is None: if self.get_distance_measure is None: self.get_distance_measure = self.setup_distance_measure_getter(self) self.distance_measure = self.get_distance_measure(self) if self.n_jobs > 1 or self.n_jobs < 0: parallel = Parallel(self.n_jobs) self.trees = parallel( delayed(self._fit_tree)( X, y, index, self.random_state.randint(0, self.n_estimators) ) for index in range(self.n_estimators) ) else: self.trees = [ self._fit_tree( X, y, index, self.random_state.randint(0, self.n_estimators) ) for index in range(self.n_estimators) ] self._is_fitted = True return self