Esempio n. 1
0
    def fit(self, X, y):
        n_features = np.shape(X)[1]
        # If max_features have not been defined => select it as
        # sqrt(n_features)
        if not self.max_features:
            self.max_features = int(math.sqrt(n_features))

        if self.debug:
            print("Training (%s estimators):" % (self.n_estimators))
        # Choose one random subset of the data for each tree
        subsets = get_random_subsets(X, y, self.n_estimators)

        for i in self.bar(range(self.n_estimators)):
            X_subset, y_subset = subsets[i]
            # Feature bagging (select random subsets of the features)
            idx = np.random.choice(range(n_features),
                                   size=self.max_features,
                                   replace=True)
            # Save the indices of the features for prediction
            self.feature_indices.append(idx)
            # Choose the features corresponding to the indices
            X_subset = X_subset[:, idx]
            # Fit the tree to the data
            self.trees[i].fit(X_subset, y_subset)

            if self.debug:
                progress = 100 * (i / self.n_estimators)
                print("Progress: %.2f%%" % progress)
    def fit(self, X, y):
        with pyRAPL.Measurement('Fit_3', output=csv_output):
            n_features = np.shape(X)[1]
            # If max_features have not been defined => select it as
            # sqrt(n_features)
            if not self.max_features:
                self.max_features = int(math.sqrt(n_features))

            # Choose one random subset of the data for each tree
            subsets = get_random_subsets(X, y, self.n_estimators)

            for i in self.progressbar(range(self.n_estimators)):
                X_subset, y_subset = subsets[i]
                # Feature bagging (select random subsets of the features)
                idx = np.random.choice(range(n_features),
                                       size=self.max_features,
                                       replace=True)
                # Save the indices of the features for prediction
                self.trees[i].feature_indices = idx
                # Choose the features corresponding to the indices
                X_subset = X_subset[:, idx]
                # Fit the tree to the data
                self.trees[i].fit(X_subset, y_subset)
        csv_output.save()