Example #1
0
    def fit(self,
            X,
            y=None,
            max_depth=10,
            min_samples_leaf=1,
            sample_weight=False,
            random_state=None):
        log.info("Checking parameters...")
        self.config.set_parameters({
            "max_depth": max_depth,
            "min_samples_leaf": min_samples_leaf,
            "random_state": random_state
        })

        # create a model
        self.tree = DecisionTreeClassifier(
            max_depth=self.config.get_parameter("max_depth"),
            min_samples_leaf=self.config.get_parameter("min_samples_leaf"),
            random_state=self.config.get_parameter("random_state"))

        log.info("Generating features for {} samples...".format(len(X)))
        # Features and labels are useful for training.
        features, tokens, labels = self.feature_extractor.transform(
            X, entity_labels=self.entity_labels)

        log.info("Training Decision Tree...")
        weights = to_weights(labels) if sample_weight else None
        self.tree.fit(features, labels, sample_weight=weights)
        return self
Example #2
0
    def fit(self, X, y=None, n_estimators=9, max_features="auto",
            min_samples_leaf=1, sample_weight=False, random_state=None):
        log.info("Generating features for {} samples...".format(len(X)))
        # Features and labels are useful for training.
        features, tokens, labels = self.feature_extractor.transform(X, entity_labels=self.entity_labels)

        log.info("Checking parameters...")
        if type(max_features) == str:
            if max_features == "auto":
                max_features = self._get_max_features(features)
            elif max_features == "log":
                max_features = self._get_max_features(features, method=log2)
            elif max_features == "sqrt":
                max_features = self._get_max_features(features, method=sqrt)
            else:
                raise ValueError("Unknown method '{}' for feature selection in Random Forest".format(max_features))
        if type(max_features) != int:
            raise TypeError("The parameter 'max_features' must be either a string or integer.")
        self.config.set_parameters({"n_estimators": n_estimators,
                                    "max_features": max_features,
                                    "random_state": random_state,
                                    "min_samples_leaf": min_samples_leaf})

        # create a model
        self.rf = RandomForestClassifier(
            n_estimators=self.config.get_parameter("n_estimators"),
            max_features=self.config.get_parameter("max_features"),
            min_samples_leaf=self.config.get_parameter("min_samples_leaf"),
            random_state=self.config.get_parameter("random_state"))

        log.info("Training Random Forest...")
        weights = to_weights(labels) if sample_weight else None
        self.rf.fit(features, labels, sample_weight=weights)
        return self
    def fit(self,
            X,
            y=None,
            n_estimators=9,
            max_features="auto",
            sample_weight=False,
            random_state=None):
        # get features and labels
        features, labels = self.feature_extractor.transform(
            X, relation_labels=self.relation_labels)
        if is_empty(features):
            log.error("No examples to train, quiting...")
            return self

        log.info("Checking parameters...")
        if type(max_features) == str:
            if max_features == "auto":
                max_features = self._get_max_features(features)
            elif max_features == "log":
                max_features = self._get_max_features(features, method=log2)
            elif max_features == "sqrt":
                max_features = self._get_max_features(features, method=sqrt)
            else:
                raise ValueError(
                    "Unknown method '{}' for feature selection in Random Forest"
                    .format(max_features))
        if type(max_features) != int:
            raise TypeError(
                "The parameter 'max_features' must be either a string or integer."
            )

        self.config.set_parameters({
            "n_estimators": n_estimators,
            "max_features": max_features,
            "random_state": random_state
        })

        # create a model
        self.rf = RandomForestClassifier(
            n_estimators=self.config.get_parameter("n_estimators"),
            max_features=self.config.get_parameter("max_features"),
            random_state=self.config.get_parameter("random_state"))

        log.info("Training Random Forest...")
        weights = to_weights(labels) if sample_weight else None
        self.rf.fit(features, labels, sample_weight=weights)
        return self
Example #4
0
    def fit(self, X, y=None, sample_weight=False):
        log.info("Checking parameters...")
        self.config.validate()

        # create a model
        self.nb = MultinomialNB()

        # get features and labels
        features, labels = self.feature_extractor.transform(
            X, relation_labels=self.relation_labels)
        if is_empty(features):
            log.error("No examples to train, quiting...")
            return self

        log.info("Training Naive Bayes...")
        weights = to_weights(labels) if sample_weight else None
        self.nb.fit(features, labels, sample_weight=weights)
        return self
    def fit(self, X, y=None, max_iterations=100, C=1, sample_weight=False):
        log.info("Checking parameters...")
        self.config.set_parameters({"max_iterations": max_iterations, "C": C})
        self.config.validate()

        # create a model
        self.svm = CalibratedClassifierCV(
            LinearSVC(max_iter=self.config.get_parameter("max_iterations"),
                      C=self.config.get_parameter("C")))

        # get features and labels
        features, labels = self.feature_extractor.transform(
            X, relation_labels=self.relation_labels)
        if is_empty(features):
            log.error("No examples to train, quiting...")
            return self

        log.info("Training SVM...")
        weights = to_weights(labels) if sample_weight else None
        self.svm.fit(features, labels, sample_weight=weights)
        return self