コード例 #1
0
 def _fit_leaf(self, X, Y, fit_keywords):
     if self.verbose:
         print "Fitting leaf"
     model = deepcopy(self.leaf_model)
     for field, gen in self.randomize_leaf_params.items():
         setattr(model, field, gen())
     model.fit(X, Y, **fit_keywords)
     clear_sklearn_fields(model)
     return model
コード例 #2
0
 def _fit_leaf(self, X, Y, fit_keywords):
     if self.verbose: 
         print "Fitting leaf"
     model = deepcopy(self.leaf_model)
     for field, gen in self.randomize_leaf_params.items():
         setattr(model, field, gen())
     model.fit(X, Y, **fit_keywords) 
     clear_sklearn_fields(model)
     return model 
コード例 #3
0
 def fit(self, X, Y, **fit_keywords):
     self.models = {}
     if self.verbose:
         print "Clustering X"
     # also get back the labels so we can use them to create regressors 
     self.clusters.fit(X)
     labels = self.clusters.labels_ 
     # clear this field so that it doesn't get serialized later
     self.clusters.labels_ = None 
     for label in np.unique(labels):
         if self.verbose: 
             print "Fitting model for cluster", label 
         model = deepcopy(self.base_model)
         mask = (labels == label)
         X_slice = X[mask, :] 
         Y_slice = Y[mask] 
         model.fit(X_slice, Y_slice, **fit_keywords)
         
         # clear sklearn's left over junk to make pickled strings smaller  
         clear_sklearn_fields(model)
         self.models[label] = model
コード例 #4
0
    def fit(self, X, Y, **fit_keywords):
        n_samples, n_features = X.shape

        if self.verbose:
            print "Depth", self.depth, ": Fitting model for", n_samples, "vectors"

        if self.depth >= self.max_depth or n_samples <= self.min_leaf_size:
            self.model = self._fit_leaf(X, Y, fit_keywords)
        else:

            # if we've been passed a limit to the number of features
            # then train the current model on a random subspace of that size
            if self.num_features_per_node:
                feature_indices = np.random.permutation(n_features)
                self.subspace = feature_indices[:self.num_features_per_node]
                X_reduced = X[:, self.subspace]
            else:
                X_reduced = X

            self.model = deepcopy(self.split_classifier)
            for field, gen in self.randomize_split_params.items():
                setattr(self.model, field, gen())
            self.model.fit(X_reduced, Y, **fit_keywords)
            clear_sklearn_fields(self.model)
            pred = self.model.predict(X_reduced)

            for c in self.classes:
                mask = (pred == c)
                count = np.sum(mask)
                if count == 0:
                    self.children[c] = ConstantLeaf(int(c))
                else:
                    X_slice = X[mask, :]
                    Y_slice = Y[mask]
                    self.children[c] = self._fit_child(X_slice, Y_slice,
                                                       fit_keywords)
コード例 #5
0
 def fit(self, X, Y, **fit_keywords):
     n_samples, n_features = X.shape
     
     if self.verbose: 
         print "Depth", self.depth, ": Fitting model for", n_samples, "vectors"
         
     if self.depth >= self.max_depth or n_samples <= self.min_leaf_size:
         self.model = self._fit_leaf(X, Y, fit_keywords)
     else:
         
         # if we've been passed a limit to the number of features 
         # then train the current model on a random subspace of that size
         if self.num_features_per_node:
             feature_indices = np.random.permutation(n_features)
             self.subspace  = feature_indices[:self.num_features_per_node]
             X_reduced = X[:, self.subspace]
         else:
             X_reduced = X 
         
         
         self.model = deepcopy(self.split_classifier)
         for field, gen in self.randomize_split_params.items():
             setattr(self.model, field, gen())
         self.model.fit(X_reduced, Y, **fit_keywords)
         clear_sklearn_fields(self.model)
         pred = self.model.predict(X_reduced)
         
         for c in self.classes:
             mask = (pred == c)
             count = np.sum(mask)
             if count == 0:
                 self.children[c] = ConstantLeaf(int(c))
             else:
                 X_slice = X[mask, :] 
                 Y_slice = Y[mask]
                 self.children[c] = self._fit_child(X_slice, Y_slice, fit_keywords)
コード例 #6
0
 def fit(self, X, Y, **fit_keywords):
     assert self.base_model is not None
     assert self.bagging_percent is not None 
     assert self.bagging_replacement is not None 
     assert self.num_models is not None 
     assert self.verbose is not None
     
     self.need_to_fit = False 
     self.models = [] 
     
     X = np.atleast_2d(X)
     Y = np.atleast_1d(Y) 
     
     n_rows, total_features = X.shape
     bagsize = int(math.ceil(self.bagging_percent * n_rows))
     
     
     if self.additive: 
         self.weights = np.ones(self.num_models, dtype='float') 
     else:
         self.weights = np.ones(self.num_models, dtype='float') / self.num_models            
     
     
     # each derived class needs to implement this 
     self._init_fit(X,Y)
     if self.feature_subset_percent < 1:
         n_features = int(math.ceil(self.feature_subset_percent * total_features))
         self.feature_subsets = [] 
     else:
         n_features = total_features 
         self.feature_subsets = None 
         
     for i in xrange(self.num_models):
         if self.verbose:
             print "Training iteration", i 
         
         if self.bagging_replacement: 
             indices = np.random.random_integers(0,n_rows-1,bagsize)
         else:
             p = np.random.permutation(n_rows)
             indices = p[:bagsize] 
             
         data_subset = X[indices, :]
         if n_features < total_features: 
             feature_indices = np.random.permutation(total_features)[:n_features]
             self.feature_subsets.append(feature_indices)
             data_subset = data_subset[:, feature_indices]
             
         label_subset = Y[indices] 
         model = deepcopy(self.base_model)
         # randomize parameters using given functions
         for param_name, fn in self.randomize_params.items():
             setattr(model, param_name, fn())
         model.fit(data_subset, label_subset, **fit_keywords)
         
         self.models.append(model)
         self._created_model(X, Y, indices, i, model)
         
         if self.additive: 
             if n_features < total_features:
                 Y -= model.predict(X[:, feature_indices])
             else: 
                 Y -= model.predict(X)
                 
         clear_sklearn_fields(model) 
     # stacking works by treating the outputs of each base classifier as the 
     # inputs to an additional meta-classifier
     if self.stacking_model:
         transformed_data = self.transform(X)
         self.stacking_model.fit(transformed_data, Y)