-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_naivebayes.py
34 lines (24 loc) · 1.14 KB
/
run_naivebayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
"""
Driver to run the Naive Bayes model analysis.
author: Luigi Patruno
"""
import numpy as np
from get_data import data
from sklearn.cross_validation import train_test_split
from naivebayes import NaiveBayes
def main():
df = data()
for train_size in np.linspace(.5, .9, 5):
train, test = train_test_split(df, train_size=train_size, random_state=42)
# Since there is only 1 sample with native-country == Holand-Netherlands,
# ensure that this sample is in the training set
if 'Holand-Netherlands' in test['native-country'].unique():
train = train.append( test[test['native-country'] == 'Holand-Netherlands'] )
test = test[ test['native-country'] != 'Holand-Netherlands']
for ignore_missing in [True, False]:
nb = NaiveBayes(ignore_missing=ignore_missing)
nb.learn_parameters(train)
acc = nb.score(test[ test['native-country'] != 'Holand-Netherlands'])
print('\nTrain size: {} Test error: {} Ignore features with missing values: {}'.format(train_size, (1-acc), ignore_missing))
if __name__ == '__main__':
main()