def lr_predict(train, test, preprocessing_type):
    # separate class label (last column)
    train, labels = preprocessing.split_labels(train)

    if preprocessing_type == 'smote':
        train, labels = preprocessing.apply_smote(train, labels)

    classifier = linear_model.LogisticRegression()

    validation.cross_validate(classifier, train, labels)
    classifier.fit(train, labels)

    # test
    test, test_labels = preprocessing.split_labels(test)
    validation.test(classifier, test, test_labels)
def multiple_balanced_sets():
    train, test = preprocessing.prepare_data()
    train_list = preprocessing.multiple_balanced_samples(train, 5)
    # separate class label (last column)
    for i in range(5):
        train, labels = preprocessing.split_labels(train_list[i])
        classifier = linear_model.LogisticRegression()
        validation.cross_validate_set(classifier, train, labels)

    validation.cross_validate(classifier, train, labels)
    classifier.fit(train, labels)

    # test
    test, test_labels = preprocessing.split_labels(test)
    validation.test(classifier, test, test_labels)
Ejemplo n.º 3
0
def svm_predict(train, test, preprocessing_type):
    # separate class label (last column)
    train, labels = preprocessing.split_labels(train)

    if preprocessing_type == 'smote':
        train, labels = preprocessing.apply_smote(train, labels)

    # Classifier
    # Class weight parameter: weights positive class more strongly than negative class.
    # class_weight={1: 2.61, 0: 0.383}
    classifier = svm.SVC(kernel='rbf')

    validation.cross_validate(classifier, train, labels)
    classifier.fit(train, labels)

    # test
    test, test_labels = preprocessing.split_labels(test)
    validation.test(classifier, test, test_labels)
Ejemplo n.º 4
0
def rf_predict(train, test, preprocessing_type, results_file):
    # separate class label
    train, labels = preprocessing.split_labels(train)

    if preprocessing_type == 'smote':
        train, labels = preprocessing.apply_smote(train, labels)

    classifier = ensemble.RandomForestClassifier(class_weight={
        0: 0.75,
        1: 1.5
    },
                                                 min_samples_split=40,
                                                 n_estimators=15)

    classifier.fit(train, labels)

    vresult = validation.cross_validate(classifier, train, labels)

    # test
    test, test_labels = preprocessing.split_labels(test)
    tresult = validation.test(classifier, test, test_labels)

    # save results
    results = []
    results.append("low_skew (0=0.75, 1=1.5)")
    results.append(40)
    results.append(15)
    results.append(vresult['roc_auc'])
    results.append(vresult['precision'])
    results.append(vresult['recall'])
    results.append(vresult['f1'])
    results.append(vresult['fp'])
    results.append(vresult['fn'])

    results.append(tresult['roc_auc'])
    results.append(tresult['precision'])
    results.append(tresult['recall'])
    results.append(tresult['f1'])
    results.append(tresult['fp'])
    results.append(tresult['fn'])

    results_file.writerow(results)
Ejemplo n.º 5
0
def basic():
    train, test = preprocessing.prepare_data(True)

    with open('nn_resultsfeaturedrop_nosmote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        # vary each parameter of random forest
        split_train, split_labels = preprocessing.split_labels(train)
        split_train, split_labels = preprocessing.apply_smote(
            split_train, split_labels)

        nn_predict(split_train, split_labels, test, writer, {})
def basic():
    train, test = preprocessing.prepare_data(True)
    train = train.drop('Amount', axis=1)
    test = test.drop('Amount', axis=1)

    with open('nn_results.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        split_train, split_labels = preprocessing.split_labels(train)
        nn_predict(split_train, split_labels, test, writer)

        split_train, split_labels = preprocessing.apply_smote(
            split_train, split_labels)
        nn_predict(split_train, split_labels, test, writer)
def basic():
    train, test = preprocessing.prepare_data(True)
    train = train.drop('Amount', axis=1)
    test = test.drop('Amount', axis=1)

    with open('nn_resultslayer2_nosmote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        # vary each parameter of random forest
        num_layers_1 = 20
        num_layers_2 = 0
        num_layers_3 = 0
        alpha = 1.e-3
        for num_layers_2 in range(1, 30):
            if num_layers_2 == 0 and num_layers_3 != 0:
                continue
            split_train, split_labels = preprocessing.split_labels(train)
            params = {'layer_2': num_layers_2}
            nn_predict(split_train, split_labels, test, writer, params)
def basic():
    train, test = preprocessing.prepare_data(True)
    train = train.drop('Amount', axis=1)
    test = test.drop('Amount', axis=1)

    with open('nn_resultsepsilon_nosmote.csv', 'w') as csvfile:
        writer = csv.writer(csvfile)
        # add header
        # vary each parameter of random forest
        num_layers_1 = 20
        num_layers_2 = 0
        num_layers_3 = 0
        alpha = 1.e-3
        for epsilon in [1.e0, 1.e-1, 1.e-2, 1.e-3, 1.e-4, 1.e-5]:
            if num_layers_2 == 0 and num_layers_3 != 0:
                continue
            split_train, split_labels = preprocessing.split_labels(train)
            params = {'epsilon': epsilon}
            nn_predict(split_train, split_labels, test, writer, params)