Ejemplo n.º 1
0
from naive_bayes_classifier import NaiveBayesClassifer
import sys

model = NaiveBayesClassifer()

if sys.argv[1]:
  model.train_all(sys.argv[1])
  print 'Done'
else:
  print 'Must provide a path to input'
Ejemplo n.º 2
0
preprocess = PreprocessData()
#vocab = preprocess.load_pickle_file('vocab')
vocab = preprocess.generate_vocabulary(train_data_test)
print(len(np.unique(vocab)))
print("Normalizing train data")
train_data = preprocess.normalize_text(train_data, 'train_data_normalized')
train_data_test = preprocess.normalize_text(train_data_test,
                                            'train_data_normalized')
print("Normalizing test data")
validation_data = preprocess.normalize_text(validation_data,
                                            'validation_data_normalized')
test_data = preprocess.normalize_text(test_data,
                                      'test_data_normalized',
                                      train=False)
naive_classifier = NaiveBayesClassifer(train_data_test, vocab)
naive_classifier.train(train_data_test)
alphas = np.linspace(0.09, 1,
                     20)  # Best alpha submit 55.211% 0.09063157894736842
accuracies = []
for alpha in alphas:
    preds = naive_classifier.test_accuracy(validation_data[:, :-1], True,
                                           alpha)
    accuracy = np.mean(preds == validation_data[:, -1])
    accuracies.append(accuracy)
    print("accuracy {0} for {1}".format(accuracy, str(alpha)))
print("Best alpha: ", alphas[np.argmax(accuracies)])
#vocab = preprocess.generate_vocabulary(train_data)
#naive_classifier_test = NaiveBayesClassifer(train_data, vocab)
#naive_classifier_test.train(train_data)
predictions = naive_classifier.test_accuracy(test_data, False,
Ejemplo n.º 3
0
from naive_bayes_classifier import NaiveBayesClassifer
import sys

model = NaiveBayesClassifer(load = True)

if sys.argv[1]:
  model.predict_all(sys.argv[1])
  print 'Done'
else:
  print 'Must provide a path to input'