Exemple #1
0
def Classification_grid_search_method(pipeline, parameters, X_training_set,
                                      Y_training_set, X_test_set, Y_test_set):
    training_model = GridSearchCV(pipeline,
                                  parameters,
                                  cv=10,
                                  scoring='accuracy')
    trained_model = training_model.fit(X_training_set, Y_training_set)
    Y_pred = trained_model.predict(X_test_set)
    test_Accuracy_Score = AS(Y_test_set, Y_pred)
    train_Accuracy_Score = AS(Y_training_set,
                              trained_model.predict(X_training_set))
    Confusion_Matrix = confusion_matrix(Y_test, Y_pred)
    return (Y_pred, train_Accuracy_Score, test_Accuracy_Score,
            Confusion_Matrix, trained_model.best_estimator_)
Exemple #2
0
def RunModel(model, data, columns, Predict):
    X = data[columns]
    Y = data[Predict]

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        Y,
                                                        train_size=train,
                                                        test_size=test,
                                                        random_state=42)

    Model = model
    Model.fit(X_train, y_train)

    prediction = Model.predict(X_test)
    mse = (MSE(y_test, prediction))
    r2 = (R2(y_test, prediction))
    mae = (MAE(y_test, prediction))
    acc = AS(y_test, prediction)
    con_met = CM(y_test, prediction)
    return mse, r2, mae, acc, con_met
X = labels_X[rank1Features]
X_train, X_test, y_train, y_test = TTS(X,
                                       class_y,
                                       test_size=0.10,
                                       shuffle=True,
                                       random_state=2000)

Model = RFC(n_estimators=1000,
            max_depth=30,
            random_state=100,
            max_leaf_nodes=1000)

Model.fit(X_train, y_train)
prediction = Model.predict(X_test)

print("ACCURACY is : {:.2f}".format(AS(y_test, prediction) * 100))

### Precision, Recall, F1

print('\n')
print("Precision, Recall, F1")
print('\n')
CR = classification_report(y_test, prediction)
print(CR)
print('\n')

### ROC CURVE

fpr, tpr, thresholds = roc_curve(y_test, prediction)

roc_auc = auc(fpr, tpr)
#p = Perceptron(random_state=42,
#              max_iter=10)
#p.fit(X, y)

mlpc = MLPClassifier(hidden_layer_sizes=(15,15),solver='lbfgs')
mlpc.fit(X_train, Y_train)
for i in range(shape):
            #print(i)
            Y_pred=mlpc.predict(X_Test.loc[[i]])
            Y_pred = int(Y_pred[0])
            pred = mlpc.predict_proba(X_Test.loc[[i]])
            pred = pred[0]
            print(Y_pred, ' ', Y_Test[i], ' ', pred[Y_pred]) 
#         mlpc_result = mlpc.predict(X_test)
            Y_pred=mlpc.predict(X_Test)
            print(" Accuracy is : ", AS(Y_Test,Y_pred)*100)
            Accuracy.append(AS(Y_Test,Y_pred)*100)
                     
print(max(Accuracy))
df = pd.DataFrame()
df['Accuracy']  = Accuracy            
conf_matrix = confusion_matrix(Y_Test, Y_pred)
#accuracy = accuracy_score(Y_Test, Y_pred)
print(conf_matrix)



#param_grid = {'a': [1, 9], 'b': [True, False]}

#scores_10 = cross_val_score(estimator = mlpc,X = X_train, y = Y_train, cv = 10)
#scores1_10 = cross_val_score(estimator = mlpc,X = X_test, y = Y_test, cv = 10)
Exemple #5
0
target = colsname[9]

X = fraudcheck2[predictors]
Y = fraudcheck2[target]

rfc = RFC(n_jobs=2, oob_score=True, n_estimators=1000, criterion="entropy")

np.shape(fraudcheck2)

rfc.fit(X, Y)  # Fitting RandomForestClassifier model from sklearn.ensemble
rfc.oob_score_

rfc.predict(X)
fraudcheck2['rfc_pred'] = rfc.predict(X)
print('Model accuracy score: {0:0.4f}'.format(
    AS(fraudcheck2['TaxInc'], fraudcheck2['rfc_pred'])))

##################Company Data##################

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.metrics import accuracy_score as AS

cdata = pd.read_csv("C:\\Users\\Home\\Downloads\\Company_data.csv")
cdata.columns
cdata
cdata.describe()

a = list(cdata['Sales'])
plt.boxplot(a)
        X_Test=X_feature1
        Y_Test=y3
        shape=X_Test.shape[0]
        for i in range(shape):
#            #print(i)
            Y_pred=clf_gini.predict(X_Test.loc[[i]])
            Y_pred = int(Y_pred[0])
            pred = clf_gini.predict_proba(X_Test.loc[[i]])
            pred = pred[0]
            print(Y_pred, ' ', Y_Test[i], ' ', pred[Y_pred])  
###        print('Y Test : ', Y_Test)
###        print('Y Pred : ', Y_pred)
            Y_pred = clf_gini.predict(X_Test)
#            print(" Accuracy is : ", AS(Y_Test,Y_pred)*100, ' ',max_depth, ' ',max_leaf_nodes)
##            print('Y Pred : ', pred)
            Accuracy.append(AS(Y_Test,Y_pred)*100)
            for_depth.append(max_depth)
            for_leaf.append(max_leaf_nodes)

print(max(Accuracy))
df = pd.DataFrame()
df['Accuracy']  = Accuracy
df['max_depth'] = for_depth
df['max_leaf_nodes'] = for_leaf 
df = df.sort_values("Accuracy", ascending = False).head(10)
print(df)

#""" Taking top 10 values of max_depth and max_leaf_nodes and testing"""
#df1 = pd.DataFrame()
#Accuracy     = []
#Random_State = []
Exemple #7
0
            clf_gini.fit(X_Train, Y_Train)
            X_Test = X_feature1
            Y_Test = y3
            shape = X_Test.shape[0]
            for i in range(shape):
                #            #print(i)
                Y_pred = clf_gini.predict(X_Test.loc[[i]])
                Y_pred = int(Y_pred[0])
                pred = clf_gini.predict_proba(X_Test.loc[[i]])
                pred = pred[0]
                print(Y_pred, ' ', Y_Test[i], ' ', pred[Y_pred])
                ###        print('Y Test : ', Y_Test)
                ###        print('Y Pred : ', Y_pred)
                Y_pred = clf_gini.predict(X_Test)
                print(" Accuracy is : ",
                      AS(Y_Test, Y_pred) * 100, ' ', max_depth, ' ',
                      max_leaf_nodes, ' ', k)
                ##            print('Y Pred : ', pred)
                Accuracy.append(AS(Y_Test, Y_pred) * 100)
                for_depth.append(max_depth)
                for_leaf.append(max_leaf_nodes)

print(max(Accuracy))
df = pd.DataFrame()
df['Accuracy'] = Accuracy
df['max_depth'] = for_depth
df['max_leaf_nodes'] = for_leaf
df = df.sort_values("Accuracy", ascending=False).head(10)
print(df)

#""" Taking top 10 values of max_depth and max_leaf_nodes and testing"""
Exemple #8
0
#transforming data in principle components. 10 prinpiple components are used as the explain 99 percent of the variance
X_train_std_transformed = np.concatenate(
    (U.dot(np.diag(s))[:, 0:10], np.array(X_train.iloc[:, -2:]).reshape(
        len(X_train), 2)),
    axis=1)
X_test_std_transformed = np.concatenate(
    (X_test_std[:, :-2].dot(np.transpose(V))[:, 0:10],
     np.array(X_test.iloc[:, -2:]).reshape(len(X_test), 2)),
    axis=1)
"""Using Classification techniques for wine quality analysis"""

clf = NB.GaussianNB()
clf.fit(X_train_std_transformed, Y_train)
Y_pred_GNB = clf.predict(X_test_std_transformed)
train_accuracy_score_GNB = AS(Y_train, clf.predict(X_train_std_transformed))
test_accuracy_score_GNB = AS(Y_test, Y_pred_GNB)
confusion_matrix_GNB = confusion_matrix(Y_test, Y_pred_GNB)
precision_score_GNB = precision_score(Y_test, Y_pred_GNB, average='weighted')
recall_score_GNB = recall_score(Y_test, Y_pred_GNB, average='weighted')


#parameter selection and prediction for classification models
def Classification_grid_search_method(pipeline, parameters, X_training_set,
                                      Y_training_set, X_test_set, Y_test_set):
    training_model = GridSearchCV(pipeline,
                                  parameters,
                                  cv=10,
                                  scoring='accuracy')
    trained_model = training_model.fit(X_training_set, Y_training_set)
    Y_pred = trained_model.predict(X_test_set)