def lr_predict(train, test, preprocessing_type): # separate class label (last column) train, labels = preprocessing.split_labels(train) if preprocessing_type == 'smote': train, labels = preprocessing.apply_smote(train, labels) classifier = linear_model.LogisticRegression() validation.cross_validate(classifier, train, labels) classifier.fit(train, labels) # test test, test_labels = preprocessing.split_labels(test) validation.test(classifier, test, test_labels)
def multiple_balanced_sets(): train, test = preprocessing.prepare_data() train_list = preprocessing.multiple_balanced_samples(train, 5) # separate class label (last column) for i in range(5): train, labels = preprocessing.split_labels(train_list[i]) classifier = linear_model.LogisticRegression() validation.cross_validate_set(classifier, train, labels) validation.cross_validate(classifier, train, labels) classifier.fit(train, labels) # test test, test_labels = preprocessing.split_labels(test) validation.test(classifier, test, test_labels)
def svm_predict(train, test, preprocessing_type): # separate class label (last column) train, labels = preprocessing.split_labels(train) if preprocessing_type == 'smote': train, labels = preprocessing.apply_smote(train, labels) # Classifier # Class weight parameter: weights positive class more strongly than negative class. # class_weight={1: 2.61, 0: 0.383} classifier = svm.SVC(kernel='rbf') validation.cross_validate(classifier, train, labels) classifier.fit(train, labels) # test test, test_labels = preprocessing.split_labels(test) validation.test(classifier, test, test_labels)
def rf_predict(train, test, preprocessing_type, results_file): # separate class label train, labels = preprocessing.split_labels(train) if preprocessing_type == 'smote': train, labels = preprocessing.apply_smote(train, labels) classifier = ensemble.RandomForestClassifier(class_weight={ 0: 0.75, 1: 1.5 }, min_samples_split=40, n_estimators=15) classifier.fit(train, labels) vresult = validation.cross_validate(classifier, train, labels) # test test, test_labels = preprocessing.split_labels(test) tresult = validation.test(classifier, test, test_labels) # save results results = [] results.append("low_skew (0=0.75, 1=1.5)") results.append(40) results.append(15) results.append(vresult['roc_auc']) results.append(vresult['precision']) results.append(vresult['recall']) results.append(vresult['f1']) results.append(vresult['fp']) results.append(vresult['fn']) results.append(tresult['roc_auc']) results.append(tresult['precision']) results.append(tresult['recall']) results.append(tresult['f1']) results.append(tresult['fp']) results.append(tresult['fn']) results_file.writerow(results)
def basic(): train, test = preprocessing.prepare_data(True) with open('nn_resultsfeaturedrop_nosmote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header # vary each parameter of random forest split_train, split_labels = preprocessing.split_labels(train) split_train, split_labels = preprocessing.apply_smote( split_train, split_labels) nn_predict(split_train, split_labels, test, writer, {})
def basic(): train, test = preprocessing.prepare_data(True) train = train.drop('Amount', axis=1) test = test.drop('Amount', axis=1) with open('nn_results.csv', 'w') as csvfile: writer = csv.writer(csvfile) split_train, split_labels = preprocessing.split_labels(train) nn_predict(split_train, split_labels, test, writer) split_train, split_labels = preprocessing.apply_smote( split_train, split_labels) nn_predict(split_train, split_labels, test, writer)
def basic(): train, test = preprocessing.prepare_data(True) train = train.drop('Amount', axis=1) test = test.drop('Amount', axis=1) with open('nn_resultslayer2_nosmote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header # vary each parameter of random forest num_layers_1 = 20 num_layers_2 = 0 num_layers_3 = 0 alpha = 1.e-3 for num_layers_2 in range(1, 30): if num_layers_2 == 0 and num_layers_3 != 0: continue split_train, split_labels = preprocessing.split_labels(train) params = {'layer_2': num_layers_2} nn_predict(split_train, split_labels, test, writer, params)
def basic(): train, test = preprocessing.prepare_data(True) train = train.drop('Amount', axis=1) test = test.drop('Amount', axis=1) with open('nn_resultsepsilon_nosmote.csv', 'w') as csvfile: writer = csv.writer(csvfile) # add header # vary each parameter of random forest num_layers_1 = 20 num_layers_2 = 0 num_layers_3 = 0 alpha = 1.e-3 for epsilon in [1.e0, 1.e-1, 1.e-2, 1.e-3, 1.e-4, 1.e-5]: if num_layers_2 == 0 and num_layers_3 != 0: continue split_train, split_labels = preprocessing.split_labels(train) params = {'epsilon': epsilon} nn_predict(split_train, split_labels, test, writer, params)