Ejemplo n.º 1
0
def balance_dataset(dataset, classe):
    y = dataset[classe]
    X = dataset.drop(columns=[classe])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

    sm = SMOTE(random_state=2)

    X_train_res, y_train_res = sm.fit_sample(X_train, y_train.ravel())
    
    

    return X_train, X_test, y_train, y_test, X_train_res, y_train_res
Ejemplo n.º 2
0
green_data['hinselmann'] = 0
green_data['schiller'] = 0
hinselmann_data['hinselmann'] = 1
hinselmann_data['schiller'] = 0
schiller_data['hinselmann'] = 0
schiller_data['schiller'] = 1

super_table = green_data.append(hinselmann_data)
super_table = super_table.append(schiller_data)

X, y = split_dataset(super_table, CLASS)
X = getKBest(X, y)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

sm = SMOTE(random_state=2)

X_train_res, y_train_res = sm.fit_sample(X_train, y_train.ravel())

results = {}

for clf in base_clfs:
    clf_name = type(clf).__name__
    stats = classifier_statistics(clf, X_train_res, X_test, y_train_res,
                                  y_test)
    results[clf_name] = stats

measures = {}