all_sms_matrix.shape # (5559,6661) # For training messages train_sms_matrix = sms_bow.transform(sms_train.text) train_sms_matrix.shape # (3891,6661) # For testing messages test_sms_matrix = sms_bow.transform(sms_test.text) test_sms_matrix.shape # (1668,6661) ####### Without TFIDF matrices ######################## # Preparing a naive bayes model on training data set from sklearn.naive_bayes import MultinomialNB as MB # Multinomial Naive Bayes classifier_mb = MB() classifier_mb.fit(train_sms_matrix, sms_train.type) train_pred_m = classifier_mb.predict(train_sms_matrix) accuracy_train_m = np.mean(train_pred_m == sms_train.type) # 98% test_pred_m = classifier_mb.predict(test_sms_matrix) accuracy_test_m = np.mean(test_pred_m == sms_test.type) # 96% print(accuracy_train_m, accuracy_test_m) #########################################################3 # Learning Term weighting and normalizing on entire emails tfidf_transformer = TfidfTransformer().fit(all_sms_matrix) # Preparing TFIDF for train emails train_tfidf = tfidf_transformer.transform(train_sms_matrix)
#(3891, 6661) ##For test data test_emails_matrix = email_bow.transform(x_test) test_emails_matrix.shape ##(1668, 6661) ##Building the model without doing the TFIDF### ##Preparing the Naive Bayes model from sklearn.naive_bayes import MultinomialNB as MB from sklearn.naive_bayes import GaussianNB as GB ##Building the Multinomial naive bayes model classifier_nb = MB() classifier_nb.fit(train_emails_matrix, y_train) train_pred_nb = classifier_nb.predict(train_emails_matrix) accuracy_nb = np.mean(train_pred_nb == y_train) ##98.8% pd.crosstab(train_pred_nb, y_train) ##predicting on test data test_pred_nb = classifier_nb.predict(test_emails_matrix) accuracy_test_nb = np.mean(test_pred_nb == y_test) ##96.82% pd.crosstab(test_pred_nb, y_test) ##Building Gaussian model classifier_gb = GB()
tfidf_transformer = TfidfTransformer().fit(all_twitter_matrix) # Preparing TFIDF for train emails train_tfidf = tfidf_transformer.transform(train_twitter_matrix) train_tfidf.shape # (row, column) # Preparing TFIDF for test emails test_tfidf = tfidf_transformer.transform(test_twitter_matrix) test_tfidf.shape # (row, column) # Preparing a naive bayes model on training data set from sklearn.naive_bayes import MultinomialNB as MB # Multinomial Naive Bayes classifier_mb = MB() classifier_mb.fit(train_tfidf, twitter_train.target) # Evaluation on Test Data test_pred_m = classifier_mb.predict(test_tfidf) test_pred_m accuracy_test_m = np.mean(test_pred_m == twitter_test.target) accuracy_test_m from sklearn.metrics import accuracy_score accuracy_score(test_pred_m, twitter_test.target) pd.crosstab(test_pred_m, twitter_test.target) # Training Data accuracy train_pred_m = classifier_mb.predict(train_tfidf)
number=LabelEncoder() for i in string_columns: salary_train[i]=number.fit_transform(salary_train[i]) salary_test[i]=number.fit_transform(salary_test[i]) x_train=salary_train.iloc[:,0:12] y_train=salary_train.iloc[:,13] x_test=salary_test.iloc[:,0:12] y_test=salary_test.iloc[:,13] #######Importing the navies bayes function###### from sklearn.naive_bayes import MultinomialNB as MB from sklearn.naive_bayes import GaussianNB as GB classifiers_mb=MB() classifiers_mb.fit(x_train,y_train) train_pred_mb=classifiers_mb.predict(x_train) train_accu_mb=np.mean(train_pred_mb==y_train)##77% pd.crosstab(train_pred_mb,y_train) test_pred_mb=classifiers_mb.predict(x_test) test_accu_mb=np.mean(test_pred_mb==y_test)##77% pd.crosstab(test_pred_mb,y_test) classifiers_gb=GB() classifiers_gb.fit(x_train,y_train) train_pred_gb=classifiers_gb.predict(x_train) train_accu_gb=np.mean(train_pred_gb==y_train)##80% pd.crosstab(train_pred_gb,y_train)
train_X = final_train_df.iloc[:, 0:13] train_y = final_train_df.iloc[:, 13] print(train_X.head()) print(train_y.head()) input() test_X = final_test_df.iloc[:, 0:13] test_y = final_test_df.iloc[:, 13] print(test_X.head()) print(test_y.head()) input() # Naive Bayes model ignb = GB() imnb = MB() # Building and predicting at the same time pred_gnb = ignb.fit(train_X, train_y).predict(test_X) # GaussianNB model pred_mnb = imnb.fit(train_X, train_y).predict(test_X) # Multinomal model # Confusion matrix GaussianNB model print(confusion_matrix(test_y, pred_gnb)) print(pd.crosstab(test_y.values.flatten(), pred_gnb)) print(classification_report(test_y, pred_gnb)) # classification report print(np.mean( pred_gnb == test_y.values.flatten())) #>> Accuracy = 0.7946879150066402 input() # Confusion matrix Multinomal model print(confusion_matrix(test_y, pred_mnb))
train_tfidf.shape # (row, column) # Preparing TFIDF for test emails test_tfidf = tfidf_transformer.transform(test_twitter_matrix) test_tfidf.shape # (row, column) # # Model Building # In[30]: # Preparing a naive bayes model on training data set from sklearn.naive_bayes import MultinomialNB as MB # Multinomial Naive Bayes classifier_mb = MB() classifier_mb.fit(train_tfidf, twitter_train.target) # Evaluation on Test Data test_pred_m = classifier_mb.predict(test_tfidf) test_pred_m accuracy_test_m = np.mean(test_pred_m == twitter_test.target) accuracy_test_m from sklearn.metrics import accuracy_score accuracy_score(test_pred_m, twitter_test.target) pd.crosstab(test_pred_m, twitter_test.target) # Training Data accuracy train_pred_m = classifier_mb.predict(train_tfidf)