Ejemplo n.º 1
0
    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=500,
                 max_leaf_nodes=16,
                 verbose=0,
                 model: RandomForestClassifier = None,
                 logger=None):
        self.scores = scores = [
            'recall_weighted', 'precision_micro', 'precision_weighted'
        ]
        self.tuned_parameters = {
            'criterion': ['gini', 'entropy'],
            'class_weight': ['balanced', None],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None],
            'bootstrap': [True, False]
        }
        if model == None:
            self.classifier = RandomForestClassifier(n_estimators=150,
                                                     max_depth=5,
                                                     criterion='gini',
                                                     class_weight=None,
                                                     bootstrap=True)
            # self.classifier = RandomForestClassifier(n_estimators=n_estimators, max_leaf_nodes=16, n_jobs=-1,
            #                                         verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 2
0
    def __init__(self,
                 dataset: "DataSet",
                 verbose=0,
                 model: SGDClassifier = None,
                 logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters_tf = {
            'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
            'tfidf__use_idf': [True, False],
            'tfidf__smooth_idf': [True, False],
            'tfidf__sublinear_tf': [True, False],
            'clf__loss': ['hinge', 'modified_huber'],
            'clf__penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'clf__class_weight': ['balanced', None],
            'clf__fit_intercept': [True, False]
        }
        self.tuned_parameters = {
            'loss': ['hinge', 'modified_huber'],
            'penalty': ['none', 'l1', 'l2', 'elasticnet'],
            'class_weight': ['balanced', None],
            'fit_intercept': [True, False]
        }
        if model == None:
            self.classifier = SGDClassifier()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 3
0
    def __init__(self,
                 dataset,
                 estimators: [(str, BaseEstimator)],
                 logger=None,
                 voting='hard',
                 weights=None,
                 *args,
                 **kwargs):
        """
        Parameters
        ----------
        voting: str, {‘hard’, ‘soft’} (default=’hard’) 
            If ‘hard’, uses predicted class labels for majority rule voting. Else if ‘soft’, 
            predicts the class label based on the argmax of the sums of the predicted probabilities, 
            which is recommended for an ensemble of well-calibrated classifiers.
        weights: array-like, shape = [n_classifiers], optional (default=`None`)
            Sequence of weights (float or int) to weight the occurrences of predicted class labels (hard voting) 
            or class probabilities before averaging (soft voting). Uses uniform weights if None.
        """

        self.classifier = VotingClassifier(estimators,
                                           voting=voting,
                                           weights=weights)

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger)
Ejemplo n.º 4
0
    def __init__(self, dataset: "DataSet", loss='squared_hinge', max_iter=1000, verbose=0, model=None,
                 logger: "Logger" = None):
        # self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'C': [1, 10, 100, 1000], 'gamma': [1e-3, 1e-4, 'auto'],
                                 'kernel': ['rbf', 'linear', 'poly'], 'class_weight': ['balanced', None],
                                 'degree': [3, 4, 5]}
        if model == None:
            self.classifier = SVC()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 5
0
    def __init__(self, dataset: "DataSet", verbose=0, model: MultinomialNB = None, logger=None):
        self.scores = scores = ['precision_weighted']
        self.tuned_parameters = {'vect__ngram_range': [(1, 1), (1, 2), (1, 3), (1, 4)],
                                 'tfidf__use_idf': [True, False],
                                 'tfidf__smooth_idf': [True, False],
                                 'tfidf__sublinear_tf': [True, False],
                                 'clf__alpha': [1e-2, 1e-3, 1e-4],
                                 'clf__fit_prior': [True, False]}
        if model == None:
            self.classifier = MultinomialNB()
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
Ejemplo n.º 6
0
    def __init__(self,
                 dataset: "DataSet",
                 n_estimators=120,
                 verbose=0,
                 model=None,
                 logger: "Logger" = None):
        self.scores = scores = ['recall_weighted', 'precision_weighted']
        self.tuned_parameters = {
            'loss': ['deviance'],
            'learning_rate': [0.3, 0.1, 0.03, 0.01, 0.003, 0.001],
            'n_estimators': [10, 30, 50, 100, 150, 200],
            'max_depth': [2, 3, 4, 5, 6, 7, None]
        }
        if model == None:
            self.classifier = GradientBoostingClassifier(n_estimators=100,
                                                         max_depth=5,
                                                         loss='deviance',
                                                         learning_rate=0.1)
            # self.classifier = GradientBoostingClassifier(max_depth=2, n_estimators=n_estimators, verbose=verbose)
        else:
            self.classifier = model

        SklearnClassifier.__init__(self, self.classifier)
        Classifier.__init__(self, dataset, logger=logger)
 def __init__(self, path, train_df, test_df, features):
     Classifier.__init__(self, path, train_df, test_df, features)