Ejemplo n.º 1
0
    grid_result = grid.fit(x, y)
    print("Best: %f using %s" %
          (grid_result.best_score_, grid_result.best_params_))
    params = grid_result.best_params_
    return create_fit_model(x_train,
                            y_train,
                            look_back,
                            params=params,
                            times_to_repeat=1)


if __name__ == "__main__":
    look_back = constants.LOOK_BACK
    print("Lookback using: ", look_back)
    x_train, y_train = sequences_crafting_for_classification.get_train_set(
        scenario=constants.EIGHTH_SCENARIO)

    # if the dataset is the real one -> contrast imbalanced dataset problem
    # if constants.DATASET_TYPE == constants.REAL_DATASET:
    #    x_train, y_train = resampling_dataset.oversample_set(x_train, y_train)

    x_test, y_test = sequences_crafting_for_classification.get_test_set(
        scenario=constants.EIGHTH_SCENARIO)

    # model, threshold = create_fit_model(x_train, y_train, look_back, times_to_repeat=1)
    model, threshold = model_selection(x_train, y_train, look_back)
    testPredict = model.predict(x_test)
    evaluation.evaluate(y_test, testPredict, threshold)
    # explainability.explain_dataset(model, x_train, x_test, threshold, y_test)
    # runtime_testing_fraud_buster(model)
def repeat_experiment_n_times(lstm,
                              rf,
                              xg_reg,
                              scenario,
                              times_to_repeat=100,
                              adversarial_attack=False,
                              evasion_attack=False,
                              is_white_box_attack=True,
                              use_lstm_for_adversarial=False):
    tn_s = []
    tp_s = []
    fp_s = []
    fn_s = []
    f1_s = []
    balanced_accuracies = []
    precisions = []
    recalls = []
    aucpr_s = []
    roc_aucs = []

    num_decisions_taken_by_lstm,\
    num_decisions_taken_by_rf, \
    num_decisions_taken_by_xgb, \
    num_decisions_correctly_taken_from_lstm, \
    num_decisions_correctly_taken_from_lstm_and_not_from_xgb_or_rf = 0, 0, 0, 0, 0

    for i in range(times_to_repeat):
        print("Iteration", i)
        x_test_set, y_test_set = sequences_crafting_for_classification.get_test_set(
            scenario=scenario)

        x_val, y_val, x_test, y_test = evaluation.get_val_test_set(
            x_test_set, y_test_set, val_size=0.25)
        x_val_supervised = x_val[:, len(x_val[0]) - 1, :]
        x_test_supervised = x_test[:, len(x_val[0]) - 1, :]

        if adversarial_attack or evasion_attack:
            # getting train set for training
            if is_white_box_attack:
                print("Using as training set, the real one - whitebox attack")
                dataset_type = REAL_DATASET
            else:
                print("Using as training set, the old one - blackbox attack")
                dataset_type = OLD_DATASET

            x_train, y_train = sequences_crafting_for_classification.get_train_set(
                dataset_type=dataset_type)
            x_train_supervised = x_train[:, look_back, :]
            if adversarial_attack:
                print("Crafting an adversarial attack")
                if not use_lstm_for_adversarial:
                    print("The attacker will use a Multilayer perceptron")
                    # training multilayer perceptron
                    # todo: hyper param tuning multilayer perceptron
                    adversarial_model = MultiLayerPerceptron.create_fit_model(
                        x_train_supervised, y_train)
                    # crafting adversarial samples
                    x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
                    frauds = x_test_supervised[np.where(y_test == 1)]

                    adversarial_samples = fgsm.craft_sample(frauds,
                                                            adversarial_model,
                                                            epsilon=0.01)

                    x_test[np.where(y_test == 1),
                           len(x_test[0]) - 1] = adversarial_samples
                    x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
                else:
                    print("The attacker will use a LSTM network")
                    # train the network using the right params
                    if is_white_box_attack:
                        if USING_AGGREGATED_FEATURES:
                            params = BEST_PARAMS_LSTM_REAL_DATASET_AGGREGATED
                        else:
                            params = BEST_PARAMS_LSTM_REAL_DATASET_NO_AGGREGATED
                    else:
                        if USING_AGGREGATED_FEATURES:
                            params = BEST_PARAMS_LSTM_OLD_DATASET_AGGREGATED
                        else:
                            params = BEST_PARAMS_LSTM_OLD_DATASET_NO_AGGREGATED
                    adversarial_model = LSTM_classifier.create_fit_model(
                        x_train, y_train, look_back, params=params)
                    frauds = x_test[np.where(y_test == 1)]
                    adversarial_samples = fgsm.craft_sample(frauds,
                                                            adversarial_model,
                                                            epsilon=0.01)
                    x_test[np.where(y_test == 1)] = adversarial_samples
                    x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

            if evasion_attack:
                print("Crafting an evasion attack")
                # train the network using the right params
                if is_white_box_attack:
                    if USING_AGGREGATED_FEATURES:
                        params = BEST_PARAMS_RF_REAL_DATASET_AGGREGATED
                    else:
                        params = BEST_PARAMS_RF_REAL_DATASET_NO_AGGREGATED
                else:
                    if USING_AGGREGATED_FEATURES:
                        params = BEST_PARAMS_RF_OLD_DATASET_AGGREGATED
                    else:
                        params = BEST_PARAMS_RF_OLD_DATASET_NO_AGGREGATED
                # training the oracle
                oracle = RF.create_model(x_train_supervised,
                                         y_train,
                                         params=params)

                # get the oracle threshold
                y_val_pred_oracle = oracle.predict_proba(x_val_supervised)
                oracle_threshold = evaluation.find_best_threshold_fixed_fpr(
                    y_val, y_val_pred_oracle[:, 1])

                # if the oracle predicts the fraud as fraud -> discard it, otherwise inject in real bank system
                y_pred_oracle = rf.predict_proba(x_test_supervised)
                y_pred_oracle = y_pred_oracle[:, 1].ravel()
                y_pred_oracle = np.array(
                    evaluation.adjusted_classes(y_pred_oracle,
                                                oracle_threshold))

                x_test = x_test[(np.where((
                    (y_test == 1) & (y_pred_oracle == 0)) | (y_test == 0)))]
                y_test = y_test[(np.where((
                    (y_test == 1) & (y_pred_oracle == 0)) | (y_test == 0)))]
                x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
        try:
            # a, b, c, d, e = 0, 0, 0, 0, 0
            # y_test_pred, not_by_xgb, not_by_rf, not_found_by_others = predict_test_based_on_voting(lstm, rf, xg_reg, x_val, x_val_supervised, y_val, x_test, x_test_supervised, y_test)
            y_test_pred, a, b, c, d, e = predict_test_based_on_more_confident(
                lstm, rf, xg_reg, x_val, x_val_supervised, y_val, x_test,
                x_test_supervised, y_test)
            # y_test_pred, a, b, c, d, e = predict_test_based_on_expon(lstm, rf, xg_reg, x_val, x_val_supervised, y_val, x_test, x_test_supervised, y_test)
            # y_test_pred = predict_test_based_on_sum(lstm, rf, xg_reg, x_val, x_val_supervised, y_val, x_test, x_test_supervised)
            # y_test_pred = predict_test_based_on_more_confident_and_majority_voting(lstm, rf, xg_reg, x_val, x_val_supervised, y_val, x_test, x_test_supervised, y_test)
            # not_found_by_xgboost += not_by_xgb
            # not_by_rf += not_by_rf
            # not_found_by_others += not_by_others

            y_test_pred = np.array(y_test_pred)
            confusion, f1, balanced_accuracy, precision, recall, aucpr, roc_auc = evaluation.get_performance(
                y_test, y_test_pred, threshold=True)
            tn = confusion[0, 0]
            tp = confusion[1, 1]
            fp = confusion[0, 1]
            fn = confusion[1, 0]

            tn_s.append(tn)
            tp_s.append(tp)
            fp_s.append(fp)
            fn_s.append(fn)
            f1_s.append(f1)

            num_decisions_taken_by_lstm += a
            num_decisions_taken_by_rf += b
            num_decisions_taken_by_xgb += c
            num_decisions_correctly_taken_from_lstm += d
            num_decisions_correctly_taken_from_lstm_and_not_from_xgb_or_rf += e

            balanced_accuracies.append(balanced_accuracy)
            precisions.append(precision)
            recalls.append(recall)
            aucpr_s.append(aucpr)
            roc_aucs.append(roc_auc)
        except RuntimeError:
            i -= 1

    print("Num decisions taken from lstm: ",
          num_decisions_taken_by_lstm / times_to_repeat)
    print("Num decisions taken by rf: ",
          num_decisions_taken_by_rf / times_to_repeat)
    print("Num decisions taken by xgb: ",
          num_decisions_taken_by_xgb / times_to_repeat)
    print("Num decisions taken by lstm correctly taken: ",
          num_decisions_correctly_taken_from_lstm / times_to_repeat)
    print(
        "Num decisions taken by lstm correctly taken and not by others: ",
        num_decisions_correctly_taken_from_lstm_and_not_from_xgb_or_rf /
        times_to_repeat)
    evaluation.print_results(
        np.array(tn_s).mean(),
        np.array(fp_s).mean(),
        np.array(fn_s).mean(),
        np.array(tp_s).mean(),
        np.array(f1_s).mean(),
        np.array(balanced_accuracies).mean(),
        np.array(precisions).mean(),
        np.array(recalls).mean(),
        np.array(aucpr_s).mean(),
        np.array(roc_aucs).mean())
def experiment_with_cdf(lstm,
                        scale_lstm,
                        loc_lstm,
                        mean_lstm,
                        std_lstm,
                        threshold_lstm,
                        rf,
                        scale_rf,
                        loc_rf,
                        mean_rf,
                        std_rf,
                        threshold_rf,
                        xg_reg,
                        scale_xgb,
                        loc_xgb,
                        mean_xgb,
                        std_xgb,
                        threshold_xgb,
                        scenario,
                        adversarial_attack=False,
                        evasion_attack=False,
                        is_white_box_attack=True,
                        use_lstm_for_adversarial=False):
    x_test, y_test = sequences_crafting_for_classification.get_test_set(
        scenario=scenario)
    x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

    if adversarial_attack or evasion_attack:
        # getting train set for training
        if is_white_box_attack:
            print("whitebox attack")
            dataset_type = INJECTED_DATASET
        else:
            print("blackbox attack")
            dataset_type = OLD_DATASET

        x_train, y_train = sequences_crafting_for_classification.get_train_set(
            dataset_type=dataset_type)
        x_train_supervised = x_train[:, look_back, :]
        if adversarial_attack:
            print("Crafting an adversarial attack")
            if not use_lstm_for_adversarial:
                print("The attacker will use a Multilayer perceptron")
                # training multilayer perceptron
                # todo: hyper param tuning multilayer perceptron
                adversarial_model = MultiLayerPerceptron.create_fit_model(
                    x_train_supervised, y_train)
                # crafting adversarial samples
                x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
                frauds = x_test_supervised[np.where(y_test == 1)]

                adversarial_samples = fgsm.craft_sample(frauds,
                                                        adversarial_model,
                                                        epsilon=0.01)

                x_test[np.where(y_test == 1),
                       len(x_test[0]) - 1] = adversarial_samples
                x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
            else:
                print("The attacker will use a LSTM network")
                # train the network using the right params
                if is_white_box_attack:
                    if USING_AGGREGATED_FEATURES:
                        params = BEST_PARAMS_LSTM_AGGREGATED
                    else:
                        params = BEST_PARAMS_LSTM_NO_AGGREGATED
                else:
                    if USING_AGGREGATED_FEATURES:
                        params = BEST_PARAMS_LSTM_OLD_DATASET_AGGREGATED
                    else:
                        params = BEST_PARAMS_LSTM_OLD_DATASET_NO_AGGREGATED
                adversarial_model, _ = LSTM_classifier.create_fit_model(
                    x_train, y_train, look_back, params=params)
                frauds = x_test[np.where(y_test == 1)]
                adversarial_samples = fgsm.craft_sample(frauds,
                                                        adversarial_model,
                                                        epsilon=0.1)
                x_test[np.where(y_test == 1)] = adversarial_samples
                x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

        if evasion_attack:
            print("Crafting an evasion attack")
            # train the network using the right params
            if is_white_box_attack:
                if USING_AGGREGATED_FEATURES:
                    params = BEST_PARAMS_RF_AGGREGATED
                else:
                    params = BEST_PARAMS_RF_NO_AGGREGATED
            else:
                if USING_AGGREGATED_FEATURES:
                    params = BEST_PARAMS_RF_OLD_DATASET_AGGREGATED
                else:
                    params = BEST_PARAMS_RF_OLD_DATASET_NO_AGGREGATED
            # training the oracle
            oracle, oracle_threshold = RF.create_model(x_train_supervised,
                                                       y_train,
                                                       params=params)

            # if the oracle predicts the fraud as fraud -> discard it, otherwise inject in real bank system
            y_pred_oracle = oracle.predict_proba(x_test_supervised)
            y_pred_oracle = y_pred_oracle[:, 1].ravel()
            y_pred_oracle = np.array(
                evaluation.adjusted_classes(y_pred_oracle, oracle_threshold))

            x_test = x_test[(np.where(((y_test == 1) & (y_pred_oracle == 0))
                                      | (y_test == 0)))]
            y_test = y_test[(np.where(((y_test == 1) & (y_pred_oracle == 0))
                                      | (y_test == 0)))]
            x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

    y_test_pred, thresholds, num_decisions_taken_by_lstm, num_decisions_taken_by_rf, num_decisions_taken_by_xgb = predict_test_based_on_expon(
        lstm, scale_lstm, loc_lstm, mean_lstm, std_lstm, threshold_lstm, rf,
        scale_rf, loc_rf, mean_rf, std_rf, threshold_rf, xg_reg, scale_xgb,
        loc_xgb, mean_xgb, std_xgb, threshold_xgb, x_test, x_test_supervised,
        y_test)
    y_test_pred = np.array(y_test_pred)
    confusion, f1, balanced_accuracy, precision, recall, aucpr, roc_auc, fpr_values, tpr_values, accuracy, matthews_coeff = evaluation.get_performance(
        y_test, y_test_pred, thresholds)
    tn = confusion[0, 0]
    tp = confusion[1, 1]
    fp = confusion[0, 1]
    fn = confusion[1, 0]

    print("Num decisions taken from lstm: ", num_decisions_taken_by_lstm)
    print("Num decisions taken by rf: ", num_decisions_taken_by_rf)
    print("Num decisions taken by xgb: ", num_decisions_taken_by_xgb)
    evaluation.print_results(tn, fp, fn, tp, f1, balanced_accuracy, precision,
                             recall, aucpr, roc_auc, fpr_values, tpr_values,
                             accuracy, matthews_coeff)
    evaluation.print_results(
        np.array(tn_s).mean(),
        np.array(fp_s).mean(),
        np.array(fn_s).mean(),
        np.array(tp_s).mean(),
        np.array(f1_s).mean(),
        np.array(balanced_accuracies).mean(),
        np.array(precisions).mean(),
        np.array(recalls).mean(),
        np.array(aucpr_s).mean(),
        np.array(roc_aucs).mean())


look_back = LOOK_BACK
print("Lookback using: ", look_back)
x_train, y_train = sequences_crafting_for_classification.get_train_set()

# if the dataset is the real one -> contrast imbalanced dataset problem
if DATASET_TYPE == REAL_DATASET:
    x_train, y_train = resampling_dataset.oversample_set(x_train, y_train)

# train model for supervised models (xgboost/rf)
x_train_supervised = x_train[:, look_back, :]
y_train_supervised = y_train

print("Training models...")
lstm = LSTM_classifier.create_fit_model(x_train, y_train, look_back)
rf = RF.create_model(x_train_supervised, y_train_supervised)
xg_reg = xgboost_classifier.create_model(x_train_supervised,
                                         y_train_supervised)
Ejemplo n.º 5
0
def experiment(lstm,
               threshold_lstm,
               xg_reg,
               threshold_xgb,
               rf,
               threshold_rf,
               scenario,
               adversarial_attack=False,
               evasion_attack=False,
               is_white_box_attack=True,
               use_lstm_for_adversarial=False):
    x_test, y_test = sequences_crafting_for_classification.get_test_set(
        scenario=scenario)

    if adversarial_attack or evasion_attack:
        # getting train set for training
        if is_white_box_attack:
            print("Using as traing set, the real one - whitebox attack")
            dataset_type = INJECTED_DATASET
        else:
            print("Using as traing set, the old one - blackbox attack")
            dataset_type = OLD_DATASET

        x_train, y_train = sequences_crafting_for_classification.get_train_set(
            dataset_type=dataset_type)
        x_train_supervised = x_train[:, look_back, :]

        x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
        if adversarial_attack:
            print("Crafting an adversarial attack")
            if not use_lstm_for_adversarial:
                print("The attacker will use a Multilayer perceptron")
                adversarial_model = MultiLayerPerceptron.create_fit_model(
                    x_train_supervised, y_train)
                frauds = x_test_supervised[np.where(y_test == 1)]
                adversarial_samples = fgsm.craft_sample(frauds,
                                                        adversarial_model,
                                                        epsilon=0.01)
                # in lstm samples, must be changed the last transaction of the sequence
                x_test[np.where(y_test == 1),
                       len(x_test[0]) - 1] = adversarial_samples
                x_test_supervised = x_test[:, len(x_test[0]) - 1, :]
            else:
                print("The attacker will use a LSTM network")
                # train the network using the right params
                if is_white_box_attack:
                    if USING_AGGREGATED_FEATURES:
                        params = BEST_PARAMS_LSTM_REAL_DATASET_AGGREGATED
                    else:
                        params = BEST_PARAMS_LSTM_REAL_DATASET_NO_AGGREGATED
                else:
                    if USING_AGGREGATED_FEATURES:
                        params = BEST_PARAMS_LSTM_OLD_DATASET_AGGREGATED
                    else:
                        params = BEST_PARAMS_LSTM_OLD_DATASET_NO_AGGREGATED
                frauds = x_test[np.where(y_test == 1)]
                adversarial_model, _ = LSTM_classifier.create_fit_model(
                    x_train, y_train, look_back, params=params)
                adversarial_samples = fgsm.craft_sample(frauds,
                                                        adversarial_model,
                                                        epsilon=0.1)
                # in lstm samples, must be changed the last transaction of the sequence
                x_test[np.where(y_test == 1)] = adversarial_samples
                x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

        if evasion_attack:
            print("Crafting an evasion attack")
            # train the network using the right params
            if is_white_box_attack:
                if USING_AGGREGATED_FEATURES:
                    params = BEST_PARAMS_RF_AGGREGATED
                else:
                    params = BEST_PARAMS_RF_NO_AGGREGATED
            else:
                if USING_AGGREGATED_FEATURES:
                    params = BEST_PARAMS_RF_OLD_DATASET_AGGREGATED
                else:
                    params = BEST_PARAMS_RF_OLD_DATASET_NO_AGGREGATED
            # training the oracle
            oracle, oracle_threshold = RF.create_model(x_train_supervised,
                                                       y_train,
                                                       params=params)

            # if the oracle predicts the fraud as fraud -> discard it, otherwise inject in real bank system
            y_pred_oracle = rf.predict_proba(x_test_supervised)
            y_pred_oracle = y_pred_oracle[:, 1].ravel()
            y_pred_oracle = np.array(
                evaluation.adjusted_classes(y_pred_oracle, oracle_threshold))

            x_test = x_test[(np.where((y_test == 1) & (y_pred_oracle == 0)
                                      | (y_test == 0)))]
            y_test = y_test[(np.where((y_test == 1) & (y_pred_oracle == 0)
                                      | (y_test == 0)))]
            x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

        # predicting test set
        y_pred_lstm = lstm.predict(x_test)
        y_pred_rf = rf.predict_proba(x_test_supervised)
        y_pred_xgb = xg_reg.predict_proba(x_test_supervised)
        y_pred_lstm = y_pred_lstm.ravel()
        y_pred_rf = y_pred_rf[:, 1].ravel()
        y_pred_xgb = y_pred_xgb[:, 1].ravel()

        print("LSTM")
        evaluation.evaluate(y_test, y_pred_lstm, threshold_lstm)

        print("RF")
        evaluation.evaluate(y_test, y_pred_rf, threshold_rf)

        print("Xgboost")
        evaluation.evaluate(y_test, y_pred_xgb, threshold_xgb)

    if not adversarial_attack and not evasion_attack:
        x_test_supervised = x_test[:, len(x_test[0]) - 1, :]

        x_train, y_train = sequences_crafting_for_classification.get_train_set(
        )
        x_train_supervised = x_train[:, look_back, :]

        print("LSTM")
        y_pred_lstm = lstm.predict(x_test)
        evaluation.evaluate(y_test, y_pred_lstm, threshold_lstm)
        explainability.explain_dataset(lstm, x_train, x_test, threshold_lstm,
                                       y_test)

        print("RF")
        y_pred_rf = rf.predict_proba(x_test_supervised)[:, 1]
        evaluation.evaluate(y_test, y_pred_rf, threshold_rf)
        explainability.explain_dataset(rf, x_train_supervised,
                                       x_test_supervised, threshold_rf, y_test)

        print("Xgboost")
        y_pred_xgb = xg_reg.predict_proba(x_test_supervised)[:, 1]
        evaluation.evaluate(y_test, y_pred_xgb, threshold_xgb)
        explainability.explain_dataset(xg_reg, x_train_supervised,
                                       x_test_supervised, threshold_xgb,
                                       y_test)

    y_pred_lstm = evaluation.adjusted_classes(y_pred_lstm, threshold_lstm)
    y_pred_rf = evaluation.adjusted_classes(y_pred_rf, threshold_rf)
    y_pred_xgb = evaluation.adjusted_classes(y_pred_xgb, threshold_xgb)

    lstm_fraud_indices = evaluation.get_fraud_indices(y_test, y_pred_lstm)
    rf_fraud_indices = evaluation.get_fraud_indices(y_test, y_pred_rf)
    xgboost_fraud_indices = evaluation.get_fraud_indices(y_test, y_pred_xgb)
    evaluation.print_frauds_stats(lstm_fraud_indices, rf_fraud_indices,
                                  xgboost_fraud_indices)

    lstm_genuine_indices = evaluation.get_genuine_indices(y_test, y_pred_lstm)
    rf_genuine_indices = evaluation.get_genuine_indices(y_test, y_pred_rf)
    xgboost_genuine_indices = evaluation.get_genuine_indices(
        y_test, y_pred_xgb)
    evaluation.print_genuine_stats(lstm_genuine_indices, rf_genuine_indices,
                                   xgboost_genuine_indices)