def accuracy(x, y): '''Returns the accuracy score of the data model based on training data''' model = _DecisionTreeClassifier() final_model = model.fit(x, y) Xtrain, Xtest, Ytrain, Ytest = _tts(x, y, test_size=0.25) predictions = final_model.predict(Xtest) score = _accuracy_score(Ytest, predictions) return score
def __init__(self, num_labels: int = 3, *args, **kwargs): """ Initialize DecisionTreeClassifier :param num_labels: number of polarities :param args: arbitrary arguments :param kwargs: arbitrary keyword arguments """ super(DecisionTreeClassifier, self).__init__(num_labels=num_labels, *args, **kwargs) self.clf = _DecisionTreeClassifier(**kwargs)
def __init__(self, options): self.handle_options(options) out_params = convert_params( options.get('params', {}), ints=[ 'random_state', 'max_depth', 'min_samples_split', 'max_leaf_nodes' ], strs=['criterion', 'splitter', 'max_features', 'class_weight'], ) # whitelist valid values for criterion, as error raised by sklearn for invalid values is uninformative if 'criterion' in out_params: try: assert (out_params['criterion'] in ['gini', 'entropy']) except AssertionError: raise RuntimeError('Invalid value for option criterion: "%s"' % out_params['criterion']) # whitelist valid values for splitter, as error raised by sklearn for invalid values is uninformative if 'splitter' in out_params: try: assert (out_params['splitter'] in ['best', 'random']) except AssertionError: raise RuntimeError('Invalid value for option splitter: "%s"' % out_params['splitter']) if 'max_depth' not in out_params: out_params.setdefault('max_leaf_nodes', 2000) # EAFP... convert max_features to int or float if it is a number. try: out_params['max_features'] = float(out_params['max_features']) max_features_int = int(out_params['max_features']) if out_params['max_features'] == max_features_int: out_params['max_features'] = max_features_int except: pass if 'class_weight' in out_params: try: from ast import literal_eval out_params['class_weight'] = literal_eval( out_params['class_weight']) except Exception: raise RuntimeError( 'Invalid value for option class_weight: "%s"' % out_params['class_weight']) self.estimator = _DecisionTreeClassifier(**out_params)
def run_cross_validation_on_trees(X, y, tree_depths, cv=5, scoring='accuracy'): '''Returns cv_scores_mean, cv_scores_std, accuracy_scores = run_cross_validation_on_trees(X, y, tree_depths, cv=5)''' cv_scores_list = [] cv_scores_std = [] cv_scores_mean = [] accuracy_scores = [] for depth in tree_depths: tree_model = _DecisionTreeClassifier(max_depth=depth) cv_scores = _cross_val_score(tree_model, X, y, cv=cv, scoring=scoring) cv_scores_list.append(cv_scores) cv_scores_mean.append(cv_scores.mean()) cv_scores_std.append(cv_scores.std()) accuracy_scores.append(tree_model.fit(X, y).score(X, y)) cv_scores_mean = _np.array(cv_scores_mean) cv_scores_std = _np.array(cv_scores_std) accuracy_scores = _np.array(accuracy_scores) return cv_scores_mean, cv_scores_std, accuracy_scores
def Alternate(df,depth=4): struct_data = df.copy() non_numeric_columns = list(struct_data.select_dtypes(exclude=[_np.number]).columns) from sklearn.preprocessing import LabelEncoder le = LabelEncoder() for col in non_numeric_columns: if col in struct_data.columns: i = struct_data.columns.get_loc(col) struct_data.iloc[:, i] = struct_data.apply(lambda i: le.fit_transform(i.astype(str)), axis=0, result_type='expand') x = struct_data[ ['Covid cases', 'age_median', 'family_size', 'income_household_median', 'education_college_or_above', 'labor_force_participation']] y = struct_data['Qty'] model = _DecisionTreeClassifier(max_depth=depth) final_model = model.fit(x, y) return final_model
def __init__(self, **kwargs): super(Tree, self).__init__() self.clf = _DecisionTreeClassifier(**kwargs)
def model_creation(x, y, depth): ''' Creating the parameters for the model, Returns the data model and creates a dot file in the root folder ''' model = _DecisionTreeClassifier(max_depth=depth) final_model = model.fit(x, y) return final_model