Exemple #1
0
all_sms_matrix.shape  # (5559,6661)
# For training messages
train_sms_matrix = sms_bow.transform(sms_train.text)
train_sms_matrix.shape  # (3891,6661)

# For testing messages
test_sms_matrix = sms_bow.transform(sms_test.text)
test_sms_matrix.shape  # (1668,6661)

####### Without TFIDF matrices ########################
# Preparing a naive bayes model on training data set

from sklearn.naive_bayes import MultinomialNB as MB

# Multinomial Naive Bayes
classifier_mb = MB()
classifier_mb.fit(train_sms_matrix, sms_train.type)
train_pred_m = classifier_mb.predict(train_sms_matrix)
accuracy_train_m = np.mean(train_pred_m == sms_train.type)  # 98%

test_pred_m = classifier_mb.predict(test_sms_matrix)
accuracy_test_m = np.mean(test_pred_m == sms_test.type)  # 96%
print(accuracy_train_m, accuracy_test_m)

#########################################################3

# Learning Term weighting and normalizing on entire emails
tfidf_transformer = TfidfTransformer().fit(all_sms_matrix)

# Preparing TFIDF for train emails
train_tfidf = tfidf_transformer.transform(train_sms_matrix)
Exemple #2
0
#(3891, 6661)

##For test data
test_emails_matrix = email_bow.transform(x_test)
test_emails_matrix.shape
##(1668, 6661)

##Building the model without doing the TFIDF###
##Preparing the Naive Bayes model

from sklearn.naive_bayes import MultinomialNB as MB
from sklearn.naive_bayes import GaussianNB as GB

##Building the Multinomial naive bayes model

classifier_nb = MB()
classifier_nb.fit(train_emails_matrix, y_train)
train_pred_nb = classifier_nb.predict(train_emails_matrix)
accuracy_nb = np.mean(train_pred_nb == y_train)
##98.8%
pd.crosstab(train_pred_nb, y_train)

##predicting on test data
test_pred_nb = classifier_nb.predict(test_emails_matrix)
accuracy_test_nb = np.mean(test_pred_nb == y_test)
##96.82%
pd.crosstab(test_pred_nb, y_test)

##Building Gaussian model

classifier_gb = GB()
Exemple #3
0
tfidf_transformer = TfidfTransformer().fit(all_twitter_matrix)

# Preparing TFIDF for train emails
train_tfidf = tfidf_transformer.transform(train_twitter_matrix)
train_tfidf.shape  # (row, column)

# Preparing TFIDF for test emails
test_tfidf = tfidf_transformer.transform(test_twitter_matrix)
test_tfidf.shape  #  (row, column)

# Preparing a naive bayes model on training data set

from sklearn.naive_bayes import MultinomialNB as MB

# Multinomial Naive Bayes
classifier_mb = MB()
classifier_mb.fit(train_tfidf, twitter_train.target)

# Evaluation on Test Data
test_pred_m = classifier_mb.predict(test_tfidf)
test_pred_m
accuracy_test_m = np.mean(test_pred_m == twitter_test.target)
accuracy_test_m

from sklearn.metrics import accuracy_score
accuracy_score(test_pred_m, twitter_test.target)

pd.crosstab(test_pred_m, twitter_test.target)

# Training Data accuracy
train_pred_m = classifier_mb.predict(train_tfidf)
number=LabelEncoder()

for i in string_columns:
    salary_train[i]=number.fit_transform(salary_train[i])
    salary_test[i]=number.fit_transform(salary_test[i])

x_train=salary_train.iloc[:,0:12]
y_train=salary_train.iloc[:,13]
x_test=salary_test.iloc[:,0:12]
y_test=salary_test.iloc[:,13]

#######Importing the navies bayes function######
from sklearn.naive_bayes import MultinomialNB as MB
from sklearn.naive_bayes import GaussianNB as GB

classifiers_mb=MB()
classifiers_mb.fit(x_train,y_train)
train_pred_mb=classifiers_mb.predict(x_train)
train_accu_mb=np.mean(train_pred_mb==y_train)##77%
pd.crosstab(train_pred_mb,y_train)

test_pred_mb=classifiers_mb.predict(x_test)
test_accu_mb=np.mean(test_pred_mb==y_test)##77%
pd.crosstab(test_pred_mb,y_test)

classifiers_gb=GB()
classifiers_gb.fit(x_train,y_train)
train_pred_gb=classifiers_gb.predict(x_train)
train_accu_gb=np.mean(train_pred_gb==y_train)##80%
pd.crosstab(train_pred_gb,y_train)
train_X = final_train_df.iloc[:, 0:13]
train_y = final_train_df.iloc[:, 13]
print(train_X.head())
print(train_y.head())
input()

test_X = final_test_df.iloc[:, 0:13]
test_y = final_test_df.iloc[:, 13]
print(test_X.head())
print(test_y.head())
input()

# Naive Bayes model
ignb = GB()
imnb = MB()

# Building and predicting at the same time
pred_gnb = ignb.fit(train_X, train_y).predict(test_X)  # GaussianNB model
pred_mnb = imnb.fit(train_X, train_y).predict(test_X)  # Multinomal model

# Confusion matrix GaussianNB model
print(confusion_matrix(test_y, pred_gnb))
print(pd.crosstab(test_y.values.flatten(), pred_gnb))
print(classification_report(test_y, pred_gnb))  # classification report
print(np.mean(
    pred_gnb == test_y.values.flatten()))  #>> Accuracy = 0.7946879150066402
input()

# Confusion matrix Multinomal model
print(confusion_matrix(test_y, pred_mnb))
train_tfidf.shape  # (row, column)

# Preparing TFIDF for test emails
test_tfidf = tfidf_transformer.transform(test_twitter_matrix)
test_tfidf.shape  #  (row, column)

# # Model Building

# In[30]:

# Preparing a naive bayes model on training data set

from sklearn.naive_bayes import MultinomialNB as MB

# Multinomial Naive Bayes
classifier_mb = MB()
classifier_mb.fit(train_tfidf, twitter_train.target)

# Evaluation on Test Data
test_pred_m = classifier_mb.predict(test_tfidf)
test_pred_m
accuracy_test_m = np.mean(test_pred_m == twitter_test.target)
accuracy_test_m

from sklearn.metrics import accuracy_score
accuracy_score(test_pred_m, twitter_test.target)

pd.crosstab(test_pred_m, twitter_test.target)

# Training Data accuracy
train_pred_m = classifier_mb.predict(train_tfidf)