def test_save_predictions(self): tr_path = os.path.join(DATA_DIR, 'test_tr') t_path = os.path.join(DATA_DIR, 'test_t') rfc = RFC(n_estimators=100, criterion="entropy", n_jobs=-1) sampling_settings = { 'bin_count': 16, 'neg_samples': 7, 'bin_samples': 20, 'seed': 0, 'nan_value': -1000000 } loading_tool = LoadingTool(sampling_settings) clas_tool = ClassificationTool(rfc) tr_data = loading_tool.load_training_data(tr_path) tr_data = loading_tool.quantize_data(tr_data) clas_tool.train_classifier(tr_data) tr_data = None output_file = os.path.join(ROOT_DIR, 'outputs/rfc.test') for t_data in loading_tool.load_testing_data(t_path): t_data = loading_tool.quantize_data(t_data) clas_tool.save_predictions(t_data, output_file) t_data = None assert os.path.isfile(output_file) os.remove(output_file)
def test_train_classifier(self): tr_path = os.path.join(DATA_DIR, 'test_tr') rfc = RFC(n_estimators=100, criterion="entropy", n_jobs=-1) sampling_settings = { 'bin_count': 16, 'neg_samples': 7, 'bin_samples': 20, 'seed': 0, 'nan_value': -1000000 } loading_tool = LoadingTool(sampling_settings) clas_tool = ClassificationTool(rfc) tr_data = loading_tool.load_training_data(tr_path) tr_data = loading_tool.quantize_data(tr_data) clas_tool.train_classifier(tr_data) tr_data = None assert list(clas_tool.classifier.classes_) == [0, 1, 2, 3]