def fit(self, X, y=None, max_depth=10, min_samples_leaf=1, sample_weight=False, random_state=None): log.info("Checking parameters...") self.config.set_parameters({ "max_depth": max_depth, "min_samples_leaf": min_samples_leaf, "random_state": random_state }) # create a model self.tree = DecisionTreeClassifier( max_depth=self.config.get_parameter("max_depth"), min_samples_leaf=self.config.get_parameter("min_samples_leaf"), random_state=self.config.get_parameter("random_state")) log.info("Generating features for {} samples...".format(len(X))) # Features and labels are useful for training. features, tokens, labels = self.feature_extractor.transform( X, entity_labels=self.entity_labels) log.info("Training Decision Tree...") weights = to_weights(labels) if sample_weight else None self.tree.fit(features, labels, sample_weight=weights) return self
def fit(self, X, y=None, n_estimators=9, max_features="auto", min_samples_leaf=1, sample_weight=False, random_state=None): log.info("Generating features for {} samples...".format(len(X))) # Features and labels are useful for training. features, tokens, labels = self.feature_extractor.transform(X, entity_labels=self.entity_labels) log.info("Checking parameters...") if type(max_features) == str: if max_features == "auto": max_features = self._get_max_features(features) elif max_features == "log": max_features = self._get_max_features(features, method=log2) elif max_features == "sqrt": max_features = self._get_max_features(features, method=sqrt) else: raise ValueError("Unknown method '{}' for feature selection in Random Forest".format(max_features)) if type(max_features) != int: raise TypeError("The parameter 'max_features' must be either a string or integer.") self.config.set_parameters({"n_estimators": n_estimators, "max_features": max_features, "random_state": random_state, "min_samples_leaf": min_samples_leaf}) # create a model self.rf = RandomForestClassifier( n_estimators=self.config.get_parameter("n_estimators"), max_features=self.config.get_parameter("max_features"), min_samples_leaf=self.config.get_parameter("min_samples_leaf"), random_state=self.config.get_parameter("random_state")) log.info("Training Random Forest...") weights = to_weights(labels) if sample_weight else None self.rf.fit(features, labels, sample_weight=weights) return self
def fit(self, X, y=None, n_estimators=9, max_features="auto", sample_weight=False, random_state=None): # get features and labels features, labels = self.feature_extractor.transform( X, relation_labels=self.relation_labels) if is_empty(features): log.error("No examples to train, quiting...") return self log.info("Checking parameters...") if type(max_features) == str: if max_features == "auto": max_features = self._get_max_features(features) elif max_features == "log": max_features = self._get_max_features(features, method=log2) elif max_features == "sqrt": max_features = self._get_max_features(features, method=sqrt) else: raise ValueError( "Unknown method '{}' for feature selection in Random Forest" .format(max_features)) if type(max_features) != int: raise TypeError( "The parameter 'max_features' must be either a string or integer." ) self.config.set_parameters({ "n_estimators": n_estimators, "max_features": max_features, "random_state": random_state }) # create a model self.rf = RandomForestClassifier( n_estimators=self.config.get_parameter("n_estimators"), max_features=self.config.get_parameter("max_features"), random_state=self.config.get_parameter("random_state")) log.info("Training Random Forest...") weights = to_weights(labels) if sample_weight else None self.rf.fit(features, labels, sample_weight=weights) return self
def fit(self, X, y=None, sample_weight=False): log.info("Checking parameters...") self.config.validate() # create a model self.nb = MultinomialNB() # get features and labels features, labels = self.feature_extractor.transform( X, relation_labels=self.relation_labels) if is_empty(features): log.error("No examples to train, quiting...") return self log.info("Training Naive Bayes...") weights = to_weights(labels) if sample_weight else None self.nb.fit(features, labels, sample_weight=weights) return self
def fit(self, X, y=None, max_iterations=100, C=1, sample_weight=False): log.info("Checking parameters...") self.config.set_parameters({"max_iterations": max_iterations, "C": C}) self.config.validate() # create a model self.svm = CalibratedClassifierCV( LinearSVC(max_iter=self.config.get_parameter("max_iterations"), C=self.config.get_parameter("C"))) # get features and labels features, labels = self.feature_extractor.transform( X, relation_labels=self.relation_labels) if is_empty(features): log.error("No examples to train, quiting...") return self log.info("Training SVM...") weights = to_weights(labels) if sample_weight else None self.svm.fit(features, labels, sample_weight=weights) return self