from sklearn.naive_bayes import MultinomialNB nb_classifier = MultinomialNB() #train the naive bayes model on features present in count_train nb_classifier.train(count_train, y_train) #predict pred = nb_classifier.predict(count_test) #check accuracy score = metrics.accuracy_score(y_test, pred) print(score) #confusion matrix cm = metrics.confusionmatrix(y_test, pred, labels=['FAKE', 'REAL']) print(cm) ##naive bayes classifier using features created by tdidf vectorizer nb_classifier2 = MultinomialNB() nb_classifier2.train(tfidf_train, y_train) pred2 = nb_classifier2.predict(tfidf_test) #check accuracy score2 = metrics.accuracy_score(y_test, pred2) print(score2) #confusion matrix cm = metrics.confusionmatrix(y_test, pred2, labels=['FAKE', 'REAL'])
employment = {'Salaried': 0, 'Self employed': 1, ' ': 1} #importing dataset dataset = pd.readcsv('CreditCardData.csv') dataset = dataset.dropna() #creating dummy variable for column EmploymentType dataset.EmploymentType = [ employment[number] for number in dataset.EmploymentType ] loanDefaulter = dataset[['loandefault']] factors = dataset[[ 'disbursedamount', 'assetcost', 'EmploymentType', 'PRI.CURRENT.BALANCE', 'PRI.SANCTIONED.AMOUNT', 'PRIMARY.INSTAL.AMT' ]] #creating training and testing set and scaling the data factorsTrain, factorsTest, loanTrain, loanTest = sl.traintestsplit( factors, loanDefaulter, testsize=0.2, shuffle=True) scaler = StandardScaler() factorsTrain = scaler.fittransform(factorsTrain) factorsTest = scaler.fittransform(factorsTest) #creating regression object LogisticRegressor = LogisticRegression() LogisticRegressor.fit(factorsTrain, loanTrain) predictedResult = LogisticRegressor.predict(factorsTest) print('Confusion matrix using solver: ') print(metrics.confusionmatrix(loanTest, predictedResult)) print('Accuracy using solver: ') print('Accuracy:', metrics.accuracy_score(loanTest, predictedResult))
rf = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=2, min_samples_leaf=1) rf.fit(fps_train, act_train) # write the model cPickle.dump(rf, outfile, 2) i += 1 outfile.close() print "done" # make predictions for test compounds test_SVMpredictions = rf.predict(fps_test) # cross-validate scores = cross_validation.cross_val_score(rf, fps_train, act_train, cv=5) # scores = cross_validation.cross_val_score(clf_RF, X_train,y_train, cv=cv_counter,score_func=metrics.zero_one_score) # output confusion matrix and percentage accuracy on test sets print metrics.confusionmatrix(act_test, test_SVMpredictions) accuracy = rf.score(fps_test, act_test) print accuracy # calculate probabilities for each test molecules test_SVMprobabilities = rf.predict_proba(fps_test) # compute AUC metric for this CV fold fpr, tpr, thresholds = metrics.roc_curve(act_test, test_SVMprobabilities) roc_auc = metrics.auc(fpr, tpr) print "AUC (fold %d/%d): %f" % (i + 1, n, roc_auc) mean_auc += roc_auc print "Mean AUC: %f" % (mean_auc/n) plt.plot(fpr, tpr, label="Model#%d (AUC=%.2f)" % (model_id + 1, roc_auc))
for k,v in testmols_dic.items(): test_fp = AllChem.GetMorganFingerprintAsBitVect(k, 2) test_fps.append(test_fp) test_fps_array = np.array(test_fps) test_acts.append(testmols_dic [k]) test_acts_array = np.array(test_acts) # make predictions for test compounds test_SVMpredictions = rf.predict(test_fps_array) # cross-validate scores = cross_validation.cross_val_score(rf, fps_array, acts_array, cv=5) # scores = cross_validation.cross_val_score(rf, X_train,y_train, cv=cv_counter,score_func=metrics.zero_one_score) # output confusion matrix and percentage accuracy on test sets print metrics.confusionmatrix(test_acts_array, test_SVMpredictions) accuracy = rf.score(test_fps_array, test_acts_array) print accuracy # calculate probabilities for each test molecules test_SVMprobabilities = rf.predict_proba(test_fps_array) # compute AUC metric for this CV fold fpr, tpr, thresholds = metrics.roc_curve(test_acts_array, test_SVMprobabilities) roc_auc = metrics.auc(fpr, tpr) print "AUC (fold %d/%d): %f" % (i + 1, n, roc_auc) mean_auc += roc_auc print "Mean AUC: %f" % (mean_auc/n) plt.plot(fpr, tpr, label="Model#%d (AUC=%.2f)" % (model_id + 1, roc_auc))