def init_classifier_models(classifier_data_url, test_data_url, fileName): with open(classifier_data_url, 'rb') as file: classifier = pickle.load(file) _service = service() _test_data = _service.read_csv_data_without_labels(test_data_url) _val_pred = classifier.predict(_test_data) _service.save_csv_result(_val_pred, fileName, 'output_csvs')
def parameter_iteration_tunning(): _service = service() train_features, train_labels = _service.read_csv_data('dataset2/ds2/ds2Train.csv') validation_features, validation_labels = _service.read_csv_data('dataset2/ds2/ds2Val.csv') _range = [0.01, 0.001, 0.1, 1, 10] for index in _range: _clf = BernoulliNB(alpha=index) _clf.fit(train_features, train_labels ) pred = _clf.predict(validation_features) print('BernoulliNB accuracy ' + str(index) + ' is', accuracy_score(validation_labels, pred))
def parameter_iteration_tunning(): _service = service() train_features, train_labels = _service.read_csv_data( 'dataset1/ds1/ds1Train.csv') validation_features, validation_labels = _service.read_csv_data( 'dataset1/ds1/ds1Val.csv') _range = [10, 100, 200, 300, 400, 500, 1000] for index in _range: _clf = RandomForestClassifier(n_estimators=index) _clf.fit(train_features, train_labels) pred = _clf.predict(validation_features) print('RandomForest accuracy ' + str(index) + ' is', accuracy_score(validation_labels, pred))
def load_naive_bayes_bernoulli(): _service = service() train_features, train_labels = _service.read_csv_data('dataset2/ds2/ds2Train.csv') validation_features, validation_labels = _service.read_csv_data('dataset2/ds2/ds2Val.csv') # classifier = BernoulliNB() classifier = BernoulliNB(alpha=0.001) classifier = classifier.fit(train_features, train_labels) validation_predicted = classifier.predict(validation_features) print(classification_report(validation_labels, validation_predicted)) print(confusion_matrix(validation_labels, validation_predicted)) print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted)) _service.save_csv_result(validation_predicted, 'ds2Val-nb', 'output_csvs') _service.save_model_to_pkl(classifier, 'ds2Classifier-nb', 'dataset2/models/')
def parameter_iteration_tunning(): _service = service() train_features, train_labels = _service.read_csv_data( 'dataset1/ds1/ds1Train.csv') validation_features, validation_labels = _service.read_csv_data( 'dataset1/ds1/ds1Val.csv') _range = range(1, 30) for index in _range: _clf = DecisionTreeClassifier(max_depth=index) _clf.fit(train_features, train_labels) pred = _clf.predict(validation_features) print('Decision Tree accuracy ' + str(index) + ' is', accuracy_score(validation_labels, pred))
def load_naive_bayes_multinomial(): _service = service() train_features, train_labels = _service.read_csv_data('dataset1/ds1/ds1Train.csv') validation_features, validation_labels = _service.read_csv_data('dataset1/ds1/ds1Val.csv') # classifier = MultinomialNB() classifier = MultinomialNB(alpha=0.01) classifier = classifier.fit(train_features, train_labels) validation_predicted = classifier.predict(validation_features) print(classification_report(validation_labels, validation_predicted)) print(confusion_matrix(validation_labels, validation_predicted)) print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted)) _service.save_csv_result(validation_predicted, 'nb_multinomial_results', 'dataset1/results/') _service.save_model_to_pkl(classifier, 'nb_multinomial_results', 'dataset1/models/')
def load_random_forest(): _service = service() train_features, train_labels = _service.read_csv_data( 'dataset1/ds1/ds1Train.csv') validation_features, validation_labels = _service.read_csv_data( 'dataset1/ds1/ds1Val.csv') # classifier = RandomForestClassifier() classifier = RandomForestClassifier(n_estimators=10000) classifier = classifier.fit(train_features, train_labels) validation_predicted = classifier.predict(validation_features) print(classification_report(validation_labels, validation_predicted)) print(confusion_matrix(validation_labels, validation_predicted)) print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted)) _service.save_csv_result(validation_predicted, 'ds1Val-3', 'output_csvs') _service.save_model_to_pkl(classifier, 'ds1Classifier-3', 'dataset1/models/')
def load_decision_tree(): _service = service() train_features, train_labels = _service.read_csv_data( 'dataset1/ds1/ds1Train.csv') validation_features, validation_labels = _service.read_csv_data( 'dataset1/ds1/ds1Val.csv') # classifier = DecisionTreeClassifier() classifier = DecisionTreeClassifier(criterion='entropy', max_depth=36) classifier = classifier.fit(train_features, train_labels) validation_predicted = classifier.predict(validation_features) print(classification_report(validation_labels, validation_predicted)) print(confusion_matrix(validation_labels, validation_predicted)) print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted)) _service.save_csv_result(validation_predicted, 'ds1Val-dt', 'output_csvs') _service.save_model_to_pkl(classifier, 'ds1Classifier-dt', 'dataset1/models/')
def load_kneighbors(): _service = service() train_features, train_labels = _service.read_csv_data( 'dataset1/ds1/ds1Train.csv') validation_features, validation_labels = _service.read_csv_data( 'dataset1/ds1/ds1Val.csv') # classifier = KNeighborsClassifier(n_neighbors=1) classifier = KNeighborsClassifier(n_neighbors=1) classifier = classifier.fit(train_features, train_labels) validation_predicted = classifier.predict(validation_features) print(classification_report(validation_labels, validation_predicted)) print(confusion_matrix(validation_labels, validation_predicted)) print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted)) _service.save_csv_result(validation_predicted, 'k_neighbors_results', 'dataset1/results/') _service.save_model_to_pkl(classifier, 'k_neighbors_results', 'dataset1/models/')
def load_naive_bayes_complement(): _service = service() train_features, train_labels = _service.read_csv_data( 'dataset2/ds2/ds2Train.csv') validation_features, validation_labels = _service.read_csv_data( 'dataset2/ds2/ds2Val.csv') # classifier = ComplementNB() classifier = ComplementNB(alpha=1000) classifier = classifier.fit(train_features, train_labels) validation_predicted = classifier.predict(validation_features) print(classification_report(validation_labels, validation_predicted)) print(confusion_matrix(validation_labels, validation_predicted)) print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted)) _service.save_csv_result(validation_predicted, 'nb_complement_results', 'dataset2/results/') _service.save_model_to_pkl(classifier, 'nb_complement_results', 'dataset2/models/')