コード例 #1
0
def run_logreg():
    # CV
    print()
    recs, precs, accs = [], [], []
    for i in range(len(cv_splits)):
        print('CV Epoch : ' + str(i + 1))
        cv_train, cv_test = train_test_split(cv_splits[i])
        cv_train_X, cv_train_Y = get_X_Y(cv_train)
        cv_test_X, cv_test_Y = get_X_Y(cv_test)
        mlr_fit = mlr.fit(cv_train_X, cv_train_Y)
        cv_pred = mlr.predict(cv_test_X)
        # The Coefficients
        print('Coefficients : \n', mlr.coef_)
        # Recall Score
        recall = r_s(cv_test_Y, cv_pred)
        print('Recall Score : \n', recall)
        # Precision Score
        precision = p_s(cv_test_Y, cv_pred)
        print('Precision Score : \n', precision)
        # Accuracy Score
        accuracy = a_s(cv_test_Y, cv_pred)
        print('Accuracy Score : \n', accuracy)
        # Conusion Matix
        print('Confusion Matrix : \n', c_m(cv_test_Y, cv_pred))
        recs.append(recall)
        precs.append(precision)
        accs.append(accuracy)
        print()
    print('Average Recall Score : %f' % np.mean(recs))
    print('Average Precision Score : %f' % np.mean(precs))
    print('Average Accuracy Score : %f' % np.mean(accs))
    print()
    # Test
    test_X, test_Y = get_X_Y(data_test)
    test_pred = mlr.predict(test_X)
    # The Coefficients
    print('Test Coefficients : \n', mlr.coef_)
    # Recall Score
    recall = r_s(test_Y, test_pred)
    print('Recall Score : \n', recall)
    # Precision Score
    precision = p_s(test_Y, test_pred)
    print('Precision Score : \n', precision)
    # Accuracy Score
    accuracy = a_s(test_Y, test_pred)
    print('Accuracy Score : \n', accuracy)
    # Conusion Matix
    print('Confusion Matrix : \n', c_m(test_Y, test_pred))
    print()
    return None
コード例 #2
0
def sgs():
    # gaussian/rbf
    print()
    tg = time.time()
    cs = [0.1, 0.5, 1.0, 2.0, 5.0]
    sigmas = [0.1, 0.5, 1.0, 2.0, 4.0]
    hyperparams = {'C': cs, 'gamma': sigmas}
    rbf_svc = SVC(kernel='rbf', C=cs, gamma=sigmas, cache_size=4096)

    rbf_svc_clf = GridSearchCV(rbf_svc, hyperparams, cv=5)
    cv_train_X, cv_train_Y = get_X_Y(data_cv)
    rbf_svc_fit = rbf_svc_clf.fit(cv_train_X, cv_train_Y)
    rbf_svc_res = rbf_svc_clf.cv_results_
    rbf_svc_params = rbf_svc_clf.best_params_
    rbf_svc_score = rbf_svc_clf.best_score_

    test_X, test_Y = get_X_Y(data_test)
    test_pred = rbf_svc_clf.predict(test_X)
    # The Coefficients
    print('Test Estimator : \n', rbf_svc_clf.best_estimator_)
    # Recall Score
    recall = r_s(test_Y, test_pred)
    print('Recall Score : \n', recall)
    # Precision Score
    precision = p_s(test_Y, test_pred)
    print('Precision Score : \n', precision)
    # Accuracy Score
    accuracy = a_s(test_Y, test_pred)
    print('Accuracy Score : \n', accuracy)
    # Conusion Matix
    print('Confusion Matrix : \n', c_m(test_Y, test_pred))
    tg = time.time() - tg
    print('Time Secs : %f' % tg)
    return None
コード例 #3
0
def sls():
    # linear
    print()
    tl = time.time()
    cs = [0.1, 0.5, 1.0, 2.0, 5.0]
    lin_svc = SVC(C=cs, kernel='linear', cache_size=4096)
    hyperparams = {'C': cs}

    lin_svc_clf = GridSearchCV(lin_svc, hyperparams, cv=5)
    cv_train_X, cv_train_Y = get_X_Y(data_cv)
    lin_svc_fit = lin_svc_clf.fit(cv_train_X, cv_train_Y)
    lin_svc_res = lin_svc_clf.cv_results_
    lin_svc_params = lin_svc_clf.best_params_
    lin_svc_score = lin_svc_clf.best_score_

    test_X, test_Y = get_X_Y(data_test)
    test_pred = lin_svc_clf.predict(test_X)
    # The Coefficients
    print('Test Estimator : \n', lin_svc_clf.best_estimator_)
    # Recall Score
    recall = r_s(test_Y, test_pred)
    print('Recall Score : \n', recall)
    # Precision Score
    precision = p_s(test_Y, test_pred)
    print('Precision Score : \n', precision)
    # Accuracy Score
    accuracy = a_s(test_Y, test_pred)
    print('Accuracy Score : \n', accuracy)
    # Conusion Matix
    print('Confusion Matrix : \n', c_m(test_Y, test_pred))
    tl = time.time() - tl
    print('Time Secs : %f' % tl)
    return None
コード例 #4
0
def kp():
    # polynomial
    print()
    tp = time.time()
    recs, precs, accs = [], [], []

    alphas = [1.0]
    degs = [2.0, 3.0]  # M
    hyperparams = {'alpha': alphas, 'degree': degs}
    poly_krr = KernelRidge(kernel='poly',
                           alpha=alphas,
                           degree=degs,
                           gamma=1,
                           coef0=1)

    poly_krr_clf = GridSearchCV(poly_krr, hyperparams, cv=5)
    for batch in data_batches:
        batch_train, batch_test = train_test_split(batch)

        cv_train_X, cv_train_Y = get_X_Y(batch_train)
        poly_krr_fit = poly_krr_clf.fit(cv_train_X, cv_train_Y)
        poly_krr_res = poly_krr_clf.cv_results_
        poly_krr_params = poly_krr_clf.best_params_
        poly_krr_score = poly_krr_clf.best_score_

        test_X, test_Y = get_X_Y(batch_test)
        test_pred = poly_krr_clf.predict(test_X)
        # The Coefficients
        print('Test Estimator : \n', poly_krr_clf.best_estimator_)
        # Recall Score
        recall = r_s(test_Y, test_pred)
        print('Recall Score : \n', recall)
        # Precision Score
        precision = p_s(test_Y, test_pred)
        print('Precision Score : \n', precision)
        # Accuracy Score
        accuracy = a_s(test_Y, test_pred)
        print('Accuracy Score : \n', accuracy)
        # Conusion Matix
        print('Confusion Matrix : \n', c_m(test_Y, test_pred))
        recs.append(recall)
        precs.append(precision)
        accs.append(accuracy)
        print()
    print('Average Test Recall Score : %f' % np.mean(recs))
    print('Average Test Precision Score : %f' % np.mean(precs))
    print('Average Test Accuracy Score : %f' % np.mean(accs))

    tp = time.time() - tp
    print('Time Secs : %f' % tp)
    return None
コード例 #5
0
def kg():
    # gaussian/rbf
    print()
    tg = time.time()
    recs, precs, accs = [], [], []

    alphas = [1.0]
    sigmas = [0.1, 0.5, 1.0, 2.0, 4.0]
    hyperparams = {'alpha': alphas, 'gamma': sigmas}
    rbf_krr = KernelRidge(kernel='rbf', alpha=alphas, gamma=sigmas)

    rbf_krr_clf = GridSearchCV(rbf_krr, hyperparams, cv=5)
    for batch in data_batches:
        batch_train, batch_test = train_test_split(batch)

        cv_train_X, cv_train_Y = get_X_Y(batch_train)
        rbf_krr_fit = rbf_krr_clf.fit(cv_train_X, cv_train_Y)
        rbf_krr_res = rbf_krr_clf.cv_results_
        rbf_krr_params = rbf_krr_clf.best_params_
        rbf_krr_score = rbf_krr_clf.best_score_

        test_X, test_Y = get_X_Y(batch_test)
        test_pred = rbf_krr_clf.predict(test_X)
        # The Coefficients
        print('Test Estimator : \n', rbf_krr_clf.best_estimator_)
        # Recall Score
        recall = r_s(test_Y, test_pred)
        print('Recall Score : \n', recall)
        # Precision Score
        precision = p_s(test_Y, test_pred)
        print('Precision Score : \n', precision)
        # Accuracy Score
        accuracy = a_s(test_Y, test_pred)
        print('Accuracy Score : \n', accuracy)
        # Conusion Matix
        print('Confusion Matrix : \n', c_m(test_Y, test_pred))
        recs.append(recall)
        precs.append(precision)
        accs.append(accuracy)
        print()
    print('Average Test Recall Score : %f' % np.mean(recs))
    print('Average Test Precision Score : %f' % np.mean(precs))
    print('Average Test Accuracy Score : %f' % np.mean(accs))

    tg = time.time() - tg
    print('Time Secs : %f' % tg)
    return None
コード例 #6
0
def kl():
    # linear
    print()
    tl = time.time()
    recs, precs, accs = [], [], []

    alphas = [1.0]
    lin_krr = KernelRidge(alpha=alphas, kernel='linear')
    hyperparams = {'alpha': alphas}

    lin_krr_clf = GridSearchCV(lin_krr, hyperparams, cv=5)
    for batch in data_batches:
        batch_train, batch_test = train_test_split(batch)

        cv_train_X, cv_train_Y = get_X_Y(batch_train)
        lin_krr_fit = lin_krr_clf.fit(cv_train_X, cv_train_Y)
        lin_krr_res = lin_krr_clf.cv_results_
        lin_krr_params = lin_krr_clf.best_params_
        lin_krr_score = lin_krr_clf.best_score_

        test_X, test_Y = get_X_Y(batch_test)
        test_pred = lin_krr_clf.predict(test_X)
        # The Coefficients
        print('Test Estimator : \n', lin_krr_clf.best_estimator_)
        # Recall Score
        recall = r_s(test_Y, test_pred)
        print('Recall Score : \n', recall)
        # Precision Score
        precision = p_s(test_Y, test_pred)
        print('Precision Score : \n', precision)
        # Accuracy Score
        accuracy = a_s(test_Y, test_pred)
        print('Accuracy Score : \n', accuracy)
        # Conusion Matix
        print('Confusion Matrix : \n', c_m(test_Y, test_pred))
        recs.append(recall)
        precs.append(precision)
        accs.append(accuracy)
        print()
    print('Average Test Recall Score : %f' % np.mean(recs))
    print('Average Test Precision Score : %f' % np.mean(precs))
    print('Average Test Accuracy Score : %f' % np.mean(accs))

    tl = time.time() - tl
    print('Time Secs : %f' % tl)
    return None
コード例 #7
0
def sps():
    # polynomial
    print()
    tp = time.time()
    cs = [0.1, 0.5, 1.0, 2.0, 5.0]
    degs = [2.0, 3.0]  # M
    hyperparams = {'C': cs, 'degree': degs}
    poly_svc = SVC(kernel='poly',
                   C=cs,
                   degree=degs,
                   gamma=1,
                   coef0=1,
                   cache_size=4096)

    poly_svc_clf = GridSearchCV(poly_svc, hyperparams, cv=5)
    cv_train_X, cv_train_Y = get_X_Y(data_cv)
    poly_svc_fit = poly_svc_clf.fit(cv_train_X, cv_train_Y)
    poly_svc_res = poly_svc_clf.cv_results_
    poly_svc_params = poly_svc_clf.best_params_
    poly_svc_score = poly_svc_clf.best_score_

    test_X, test_Y = get_X_Y(data_test)
    test_pred = poly_svc_clf.predict(test_X)
    # The Coefficients
    print('Test Estimator : \n', poly_svc_clf.best_estimator_)
    # Recall Score
    recall = r_s(test_Y, test_pred)
    print('Recall Score : \n', recall)
    # Precision Score
    precision = p_s(test_Y, test_pred)
    print('Precision Score : \n', precision)
    # Accuracy Score
    accuracy = a_s(test_Y, test_pred)
    print('Accuracy Score : \n', accuracy)
    # Conusion Matix
    print('Confusion Matrix : \n', c_m(test_Y, test_pred))
    tp = time.time() - tp
    print('Time Secs : %f' % tp)
    return None
コード例 #8
0
                                     scoring=scoring)
    results.append(cv_results)
    names.append(name)
    # the data output is: name, mean & std
    model_res = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(model_res)

# LDA performed best
print('\n')
print('running LDA model i.e. best performing model on validation data....')
print('\n')
lda = LDA()
# first fit the lda instance against the entire training dataset
lda.fit(X_train, Y_train)
# use the trained model to make predictions againts the validation
# feature matrix X_validation
predictions = lda.predict(X_validation)
# determine the accuracy of the model by scoring against the validation
# results vector Y_validation
print('accuracy_score:', a_s(Y_validation, predictions))
print('\n')
print('confusion_matrix')
print('\n')
# generate confusion matrix
print(c_m(Y_validation, predictions))
print('\n')
print('classification_report')
print('\n')
# generate classification report
print(c_r(Y_validation, predictions))
コード例 #9
0
from sklearn.model_selection import train_test_split as t_t_s
from sklearn.naive_bayes import GaussianNB as GNB
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture as GM
from sklearn.metrics import accuracy_score as a_s

df = sns.load_dataset('iris')
x = df.drop('species', axis=1)
y = df['species']

xtr, xte, ytr, yte = t_t_s(x, y, test_size=0.25, random_state=0)
print(xtr.shape, yte.shape)  # (112, 4) (38,)
model = GNB()
model.fit(xtr, ytr)
ypred = model.predict(xte)
print("分类准确率:{0:.2%}".format(a_s(yte, ypred)))

# dimensionality reduction
pca = PCA(n_components=2)
new_x = pca.fit_transform(x)
xtr_new, xte_new, ytr_new, yte_new = t_t_s(new_x,
                                           y,
                                           test_size=0.25,
                                           random_state=0)
print(xtr_new.shape, yte_new.shape)  # (112, 2) (38,)
model1 = GNB()
model1.fit(xtr_new, ytr_new)
ypred1 = model1.predict(xte_new)
print("PCA后分类准确率:{0:.2%}".format(a_s(yte_new, ypred1)))
df['PCA1'] = new_x[:, 0]
df['PCA2'] = new_x[:, 1]
コード例 #10
0
acctest = 0.8
N = 50000
good = 0
treeid = 0

for i in range(N):
    tryclf = rfc(criterion='entropy', max_leaf_nodes=10, min_samples_split=4)
    #(criterion='entropy', 
    #max_leaf_nodes=10, 
    #min_samples_split=4)
    tprep = t()
    tryclf.fit(features_train, labels_train)
    tfit = t()
    pred = tryclf.predict(features_test)
    tpred = t()
    acc = a_s(labels_test, pred)
    tacc = t()

    if acc>acctest:
        print(f"The accuracy of the random forest {i} is {acc*100:.2f}%")
        #good = input("is this good enough (1=yes/else no)\n\n>>> ")
        if good == 1:
            acctest = acc
            clf = tryclf
            treeid = i
            print(f"Tree finished at accuracy {acctest*100:.2f}%")
        else:
            treeid = i
            acctest = acc
            clf = tryclf
    if good == 1:
コード例 #11
0
    model_res = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(model_res)

# LR performed best
print('\n')
print('running LR model i.e. best performing model on validation data....')
print('\n')
lr = LR()
# first fit the lr instance against the entire training dataset
lr.fit(X_train, Y_train)
# use the trained model to make predictions againts the validation
# feature matrix X_validation
predictions = lr.predict(X_validation)
# determine the accuracy of the model by scoring against the validation
# results vector Y_validation
print('accuracy_score:', a_s(Y_validation, predictions))
print('\n')
print('confusion_matrix')
print('\n')
# generate confusion matrix
print(c_m(Y_validation, predictions))
print('\n')
print('classification_report')
print('\n')
# generate classification report
print(c_r(Y_validation, predictions))

# load test dataset
testfile = './woe-test.csv'
dataset = pandas.read_csv(testfile, header=0)
コード例 #12
0
ファイル: woe.py プロジェクト: oakinogundeji/ml_projects
# fit model to training datasets
print('\n training d model...')
model.fit(X_train, Y_train)

# view trained model
print('\n model...')
print(model)

# make predictions for test data
print('\n making predictions...')
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = a_s(Y_test, predictions)
print("WoE Init Accuracy: %.2f%%" % (accuracy * 100.0))

# load validation dataset
validationfile = './woe-test.csv'
dataset = pd.read_csv(validationfile, header=0)

dataset = dataset.drop(['fil', 'status_log'], axis=1)

Y_ext = dataset.filter(['status'], axis=1)
print('\n Y_ext.head(5)')
print(Y_ext.head(5))

print('\n Y_ext.values')
print(Y_ext.values)
コード例 #13
0
tl = time.time()
test_pred = lin_krr_clf.predict(test_X)
tl = time.time() - tl
print ('Time Taken To Test : %f Secs.' % tl)
# Round Test Predictions to Avoid Multiclass Continuous Targets Error
test_pred = np.round(test_pred)
# The Best Estimator
print ('Test Estimator : \n', lin_krr_clf.best_estimator_)
# Recall Score
recall = r_s(test_Y, test_pred, average='micro')
print ('Recall Score : \n', recall)
# Precision Score
precision = p_s(test_Y, test_pred, average='micro')
print ('Precision Score : \n', precision)
# Accuracy Score
accuracy = a_s(test_Y, test_pred)
print ('Accuracy Score : \n', accuracy)
# Confusion Matix
print('Confusion Matrix : \n', c_m(test_Y, test_pred))
print ()

# Polynomial Kernel Ridge Regression
print ()
# Functions and Parameters
alphas = [1.0]
degs = [2.0, 3.0, 4.0] # M
hyperparams = {'alpha' : alphas, 'degree' : degs}
poly_krr = KernelRidge(kernel='poly', alpha=alphas, degree=degs, gamma=1, coef0=1)
# Polynomial KRR Initializer
poly_krr_clf = GridSearchCV(poly_krr, hyperparams, cv=5)
コード例 #14
0
test_set_size = test_dataset_nomissing.shape[0]
print('\n test_set_size...')
print(test_set_size)
X_train, X_test, Y_train, Y_test = t_t_s(X,
                                         Y,
                                         test_size=test_set_size,
                                         random_state=seed,
                                         shuffle=False)

# instantiate XGBC class using defaults
model = XGBC()

# fit model to training datasets
print('\n training d model...')
model.fit(X_train, Y_train)

# view trained model
print('\n model...')
print(model)

# make predictions for test data
print('\n making predictions...')
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
train_predictions = model.score(X_train, Y_train)

# determine the accuracy of the model by scoring against the test
# results vector Y_test
print('\n xgb_test_raw_accuracy_score:', a_s(Y_test, predictions))
print('\n xgb_train_raw_accuracy_score:', train_predictions)
コード例 #15
0
                                         test_size=test_set_size,
                                         random_state=seed,
                                         shuffle=False)

# instantiate XGBC class using defaults
model = XGBC()

# fit model to training datasets
print('\n training d model...')
model.fit(X_train, Y_train)

# view trained model
print('\n model...')
print(model)

# make predictions for test data
print('\n making predictions...')
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
train_predictions = model.score(X_train, Y_train)

# determine the accuracy of the model by scoring against the test
# results vector Y_test
print('\n xgb_test_standardized_accuracy_score:', a_s(Y_test, predictions))
print('\n xgb_train_standardized_accuracy_score:', train_predictions)

# determine the classification report of the model
print('\n xg_classification report:')
print('\n')
print(c_r(Y_test, predictions, digits=3))
コード例 #16
0
ファイル: raw.py プロジェクト: oakinogundeji/ml_projects
X_train, X_test, Y_train, Y_test = t_t_s(X, Y, test_size=test_set_size, random_state=seed, shuffle=False)

# instantiate XGBC class using defaults
model = XGBC()

# fit model to training datasets
print('\ntraining d model...')
model.fit(X_train, Y_train)

# view trained model
print('\nmodel...')
print(model)

# make predictions for test data
print('\nmaking predictions...')
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

# evaluate predictions
accuracy = a_s(Y_test, predictions)
print("Base Accuracy: %.2f%%" % (accuracy * 100.0))

# we will attempt to improve the performance by using an XGBoost specific DMatrix
xgbdmat = DMat(X_train, Y_train)
# Cheat a bit by using some parameters we can see from thje trained model
our_params = {'eta': 0.1, 'seed':0, 'subsample': 1, 'colsample_bytree': 1,
             'objective': 'binary:logistic', 'max_depth':3, 'min_child_weight':1}

cv_xgb = XGBCV(params = our_params, dtrain = xgdmat, num_boost_round = 3000, nfold = 5,
                metrics = ['error'], early_stopping_rounds = 100)
コード例 #17
0
test_set_size = test_dataset_nomissing.shape[0]
print('\n test_set_size...')
print(test_set_size)
X_train, X_test, Y_train, Y_test = t_t_s(rescaledX,
                                         Y,
                                         test_size=test_set_size,
                                         random_state=seed,
                                         shuffle=False)

# instantiate XGBC class using defaults
model = XGBC()

# fit model to training datasets
print('\n training d model...')
model.fit(X_train, Y_train)

# view trained model
print('\n model...')
print(model)

# make predictions for test data
print('\n making predictions...')
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
train_predictions = model.score(X_train, Y_train)

# determine the accuracy of the model by scoring against the test
# results vector Y_test
print('\n xgb_test_normalized_accuracy_score:', a_s(Y_test, predictions))
print('\n xgb_train_normalized_accuracy_score:', train_predictions)
コード例 #18
0
        return self.y_train[best_index]


## Reading the dataframe
iris = pd.read_csv("iris.csv")

## creating feature and target variable for the model
X = iris.drop('species', axis=1).values
y = iris['species'].values

## spliting the dataset into Training and Testing Dataset
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y)

## making a instance of the KNN class: knn
knn = KNN()

##fitting the dataset into knn classifier
knn.fit(X_train, y_train)

## predicting the species for the test dataset
prediction = knn.predict(X_test)

## calculating the accuracy for the given model
print(a_s(prediction, y_test))

# In[ ]: