예제 #1
0
def accuracy(x, y):
    '''Returns the accuracy score of the data model based on training data'''
    model = _DecisionTreeClassifier()
    final_model = model.fit(x, y)
    Xtrain, Xtest, Ytrain, Ytest = _tts(x, y, test_size=0.25)
    predictions = final_model.predict(Xtest)
    score = _accuracy_score(Ytest, predictions)
    return score
예제 #2
0
    def __init__(self, num_labels: int = 3, *args, **kwargs):
        """
        Initialize DecisionTreeClassifier

        :param num_labels: number of polarities
        :param args: arbitrary arguments
        :param kwargs: arbitrary keyword arguments
        """
        super(DecisionTreeClassifier, self).__init__(num_labels=num_labels,
                                                     *args,
                                                     **kwargs)
        self.clf = _DecisionTreeClassifier(**kwargs)
    def __init__(self, options):
        self.handle_options(options)

        out_params = convert_params(
            options.get('params', {}),
            ints=[
                'random_state', 'max_depth', 'min_samples_split',
                'max_leaf_nodes'
            ],
            strs=['criterion', 'splitter', 'max_features', 'class_weight'],
        )

        # whitelist valid values for criterion, as error raised by sklearn for invalid values is uninformative
        if 'criterion' in out_params:
            try:
                assert (out_params['criterion'] in ['gini', 'entropy'])
            except AssertionError:
                raise RuntimeError('Invalid value for option criterion: "%s"' %
                                   out_params['criterion'])

        # whitelist valid values for splitter, as error raised by sklearn for invalid values is uninformative
        if 'splitter' in out_params:
            try:
                assert (out_params['splitter'] in ['best', 'random'])
            except AssertionError:
                raise RuntimeError('Invalid value for option splitter: "%s"' %
                                   out_params['splitter'])

        if 'max_depth' not in out_params:
            out_params.setdefault('max_leaf_nodes', 2000)

        # EAFP... convert max_features to int or float if it is a number.
        try:
            out_params['max_features'] = float(out_params['max_features'])
            max_features_int = int(out_params['max_features'])
            if out_params['max_features'] == max_features_int:
                out_params['max_features'] = max_features_int
        except:
            pass

        if 'class_weight' in out_params:
            try:
                from ast import literal_eval
                out_params['class_weight'] = literal_eval(
                    out_params['class_weight'])
            except Exception:
                raise RuntimeError(
                    'Invalid value for option class_weight: "%s"' %
                    out_params['class_weight'])

        self.estimator = _DecisionTreeClassifier(**out_params)
예제 #4
0
def run_cross_validation_on_trees(X, y, tree_depths, cv=5, scoring='accuracy'):
    '''Returns cv_scores_mean, cv_scores_std, accuracy_scores = run_cross_validation_on_trees(X, y, tree_depths, cv=5)'''
    cv_scores_list = []
    cv_scores_std = []
    cv_scores_mean = []
    accuracy_scores = []
    for depth in tree_depths:
        tree_model = _DecisionTreeClassifier(max_depth=depth)
        cv_scores = _cross_val_score(tree_model, X, y, cv=cv, scoring=scoring)
        cv_scores_list.append(cv_scores)
        cv_scores_mean.append(cv_scores.mean())
        cv_scores_std.append(cv_scores.std())
        accuracy_scores.append(tree_model.fit(X, y).score(X, y))
    cv_scores_mean = _np.array(cv_scores_mean)
    cv_scores_std = _np.array(cv_scores_std)
    accuracy_scores = _np.array(accuracy_scores)
    return cv_scores_mean, cv_scores_std, accuracy_scores
예제 #5
0
def Alternate(df,depth=4):
    struct_data = df.copy()
    non_numeric_columns = list(struct_data.select_dtypes(exclude=[_np.number]).columns)
    from sklearn.preprocessing import LabelEncoder
    le = LabelEncoder()
    for col in non_numeric_columns:
        if col in struct_data.columns:
            i = struct_data.columns.get_loc(col)
            struct_data.iloc[:, i] = struct_data.apply(lambda i: le.fit_transform(i.astype(str)), axis=0,
                                                       result_type='expand')
    x = struct_data[
                ['Covid cases', 'age_median',
                 'family_size', 'income_household_median', 'education_college_or_above',
                 'labor_force_participation']]
    y = struct_data['Qty']
    model = _DecisionTreeClassifier(max_depth=depth)
    final_model = model.fit(x, y)
    return final_model
예제 #6
0
파일: common.py 프로젝트: zied-tayeb/gumpy
 def __init__(self, **kwargs):
     super(Tree, self).__init__()
     self.clf = _DecisionTreeClassifier(**kwargs)
예제 #7
0
def model_creation(x, y, depth):
    ''' Creating the parameters for the model, Returns the data model and creates a dot file in the root folder '''
    model = _DecisionTreeClassifier(max_depth=depth)
    final_model = model.fit(x, y)
    return final_model