Python EmailDataset.EmailDataset 예제들, data_reader.dataset.EmailDataset.EmailDataset Python 예제들

예제 #1

0

파일 보기

def data():
    dataset = EmailDataset(
        path='./data_reader/data/test/100_instance_debug.csv', raw=False)
    # set a seed so we get the same output every time
    seed(1)
    training_data, testing_data = dataset.split({'train': 60, 'test': 40})
    return {'training_data': training_data, 'testing_data': testing_data}

예제 #2

0

파일 보기

def data():
    dataset = EmailDataset(
        path='./data_reader/data/test/100_instance_debug.csv', raw=False)
    training_, testing_ = dataset.split({'train': 60, 'test': 40})
    training_data = load_dataset(training_)
    testing_data = load_dataset(testing_)
    return {'training_data': training_data, 'testing_data': testing_data}

예제 #3

0

파일 보기

def main(argv):
    """
    driver class that performs demo of the library
    """

    # pre-process data and randomly partition
    dataset = EmailDataset(
        path='../../data_reader/data/test/100_instance_debug.csv', raw=False)
    training_, testing_ = dataset.split({'train': 60, 'test': 40})
    training_data = load_dataset(training_)
    testing_data = load_dataset(testing_)

    # initialize and train RobustLearner
    clf2 = learner.FeatureDeletion(training_data, {
        'hinge_loss_multiplier': 1,
        'max_feature_deletion': 30
    })
    clf2.train()

    # produce simple metrics
    y_predict = clf2.predict(testing_data[0])
    y_true = testing_data[0].label
    print(y_predict, y_true)

    score = metrics.accuracy_score([y_true], [y_predict])
    print("score = " + str(score))

    wgt = clf2.decision_function()[0].tolist()[0]
    print(wgt)
    yaxis = [i for i in range(clf2.num_features)]
    plt.plot(yaxis, wgt)
    plt.show()

예제 #4

0

파일 보기

파일: svm_restrained_test.py 프로젝트: xyvivian/adlib

def data():
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False,
                           raw=True)
    # set a seed so we get the same output every time
    seed(1)
    training_data, testing_data = dataset.split({'train': 60, 'test': 40})
    return {'training_data': training_data, 'testing_data': testing_data}

예제 #5

0

파일 보기

파일: cost_sensitive_test.py 프로젝트: vishalbelsare/adlib

def data():
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/full',
                           binary=False,
                           raw=True)
    training_, testing_ = dataset.split({'train': 60, 'test': 40})
    training_data = load_dataset(training_)
    testing_data = load_dataset(testing_)
    return {'training_data': training_data, 'testing_data': testing_data}

예제 #6

0

파일 보기

파일: trim_learner_test.py 프로젝트: vishalbelsare/adlib

def test_trim_learner():
    if len(sys.argv) == 2 and sys.argv[1] in ['label-flipping', 'k-insertion',
                                              'data-modification', 'dummy']:
        attacker_name = sys.argv[1]
    else:
        attacker_name = 'dummy'

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False, raw=True)

    tester = TestDataPoisoningLearner('trim', attacker_name, dataset)
    result = tester.test()
    report(result)

예제 #7

0

파일 보기

파일: dp_learner_test.py 프로젝트: vishalbelsare/adlib

def test_dp_learners():
    if len(sys.argv) == 2 and sys.argv[1] in [
            'label-flipping', 'k-insertion', 'data-modification', 'dummy'
    ]:
        attacker_name = sys.argv[1]
    else:
        attacker_name = 'dummy'

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False,
                           raw=True)

    learners = ['trim', 'atrim', 'irl', 'outlier-removal']
    tester = TestDataPoisoningLearner(learners, attacker_name, dataset)
    results = tester.test()

    for i, tup in enumerate(results):
        report(tup, learners[i].upper())

예제 #8

0

파일 보기

파일: label_flipping_test.py 프로젝트: xyvivian/adlib

def test_label_flipping():
    print('\n#################################################################')
    print('START label flipping attack.\n')

    begin = time.time()

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=True, raw=True)
    training_data = load_dataset(dataset)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    # Randomly cut dataset in approximately half
    rand_choices = np.random.binomial(1, 0.5, len(training_data))
    new_training_data = []
    predict_data = []
    for i in range(len(training_data)):
        if rand_choices[i] == 1:
            new_training_data.append(training_data[i])
        else:
            predict_data.append(training_data[i])
    training_data = new_training_data

    # Setting the default learner
    # Test simple learner svm
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()
    orig_learner = deepcopy(learner)

    # Execute the attack
    cost = list(np.random.binomial(2, 0.5, len(training_data)))
    total_cost = 0.3 * len(training_data)  # flip around ~30% of the labels
    attacker = LabelFlipping(learner, cost, total_cost, verbose=True)
    attack_data = attacker.attack(training_data)

    flip_vector = []  # 0 -> flipped, 1 -> not flipped
    for i in range(len(attack_data)):
        if attack_data[i].get_label() != training_data[i].get_label():
            flip_vector.append(0)
        else:
            flip_vector.append(1)

    print('Flip vector with 0 -> flipped and 1 -> not flipped: \n',
          np.array(flip_vector), '\n')

    original_pred_labels = learner.predict(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(training_data)  # predict w/ orig label

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print('###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print('###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    end = time.time()
    print('\nTotal time: ', round(begin - end, 2), 's', '\n', sep='')

    print('\nEND label flipping attack.')
    print('#################################################################\n')

예제 #9

0

파일 보기

def bad_dataset_params3():
    with pytest.raises(AttributeError) as error:
        dataset = EmailDataset(path='notarealpath.pkl', features=[1, 2, 3],
                               labels=[1])

예제 #10

0

파일 보기

def bad_dataset_params2():
    with pytest.raises(AttributeError) as error:
        dataset = EmailDataset(raw=True)

예제 #11

0

파일 보기

파일: good_word_400.py 프로젝트: xyvivian/adlib

    prec = metrics.precision_score(y_true, y_pred)
    rec = metrics.recall_score(y_true, y_pred)
    return "accuracy: {0} \n precision: {1} \n recall: {2}\n".format(
        acc, prec, rec)


def get_evasion_set(x_test, y_pred):
    # for x, y in zip(x_test, y_pred):
    #     print("true label: {0}, predicted label: {1}".format(x.label, y))
    ls = [x for x, y in zip(x_test, y_pred) if x.label == 1 and y == 1]
    print("{0} malicious instances are being detected initially")
    return ls, [x.label for x in ls]


dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                       binary=True,
                       raw=True)
training_, testing_ = dataset.split({'train': 60, 'test': 40})
training_data = load_dataset(training_)
testing_data = load_dataset(testing_)
test_true_label = [x.label for x in testing_data]

# test simple learner svm
learning_model = svm.SVC(probability=True, kernel='linear')
learner1 = SimpleLearner(learning_model, training_data)
learner1.train()

predictions = learner1.predict(testing_data)
print("======== initial prediction =========")
print(summary(predictions, test_true_label))

예제 #12

0

파일 보기

def test_iterative_retraining_learner():
    print()
    print(
        '###################################################################')
    print('START TRIM learner test.\n')

    begin = time.time()

    if len(sys.argv) == 2 and sys.argv[1] in [
            'label-flipping', 'k-insertion', 'data-modification'
    ]:
        attacker_name = sys.argv[1]
    else:
        attacker_name = 'label-flipping'

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=True,
                           raw=True)

    training_data, testing_data = dataset.split({'train': 20, 'test': 80})
    training_data = load_dataset(training_data)
    testing_data = load_dataset(testing_data)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    # Setting the default learner
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()

    original_pred_labels = learner.predict(training_data)
    orig_learner = deepcopy(learner)

    # Execute the attack
    if attacker_name == 'label-flipping':
        cost = list(np.random.binomial(2, 0.5, len(training_data)))
        total_cost = 0.3 * len(training_data)  # flip around ~30% of the labels
        attacker = LabelFlipping(learner, cost, total_cost, verbose=True)
    elif attacker_name == 'k-insertion':
        number_to_add = int(0.25 * len(training_data))
        attacker = KInsertion(learner,
                              training_data[0],
                              number_to_add=number_to_add,
                              verbose=True)
    else:  # attacker_name == 'data-modification'
        lnr = orig_learner.model.learner
        eye = np.eye(training_data[0].get_feature_count(), dtype=int)
        orig_theta = lnr.decision_function(eye) - lnr.intercept_[0]
        target_theta = deepcopy(orig_theta)

        spam_instances = []
        for inst in training_data + testing_data:
            if inst.get_label() == 1:
                spam_instances.append(inst)

        spam_features, ham_features = get_spam_features(spam_instances)

        # Set features to recognize spam as ham
        for index in spam_features:
            target_theta[index] = -10

        for index in ham_features:
            target_theta[index] = 0.01

        print('Features selected: ', np.array(spam_features))
        print('Number of features: ', len(spam_features))

        attacker = DataModification(orig_learner, target_theta, verbose=True)

    print(
        '###################################################################')
    print('START', attacker_name, 'attack.\n')

    attack_data = attacker.attack(training_data)

    print('\nEND', attacker_name, 'attack.')
    print(
        '###################################################################')
    print()

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    print(
        '###################################################################')
    print('START Iterative Retraining learner.\n')

    ###

    print('\nEND Iterative Retraining learner.')
    print(
        '###################################################################')
    print()

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(
        training_data)  # predict w/ orig label

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print(
        '###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(testing_data)
    attack_pred_labels = learner.predict(testing_data)

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 testing_data)

    print(
        '###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with trim learner

    data = training_data + testing_data
    trim_pred_labels = trim_learner.predict(data)
    normal_pred_labels = learner.predict(data)

    (trim_percent_correct, normal_percent_correct,
     difference) = calculate_correct_percentages(trim_pred_labels,
                                                 normal_pred_labels, data)

    print(
        '###################################################################')
    print('Predictions using TRIM learner:')
    print('TRIM learner percentage: ', trim_percent_correct, '%')
    print('Simple learner correct percentage: ', normal_percent_correct, '%')
    print('Difference: ', difference, '%')

    end = time.time()
    print('\nTotal time: ', round(begin - end, 2), 's', '\n', sep='')

    print('\nEND TRIM learner test.')
    print(
        '###################################################################')
    print()

예제 #13

0

파일 보기

파일: real_input_cost_sensitive.py 프로젝트: xyvivian/adlib

from sklearn.naive_bayes import BernoulliNB
from adlib.learners import SimpleLearner
from data_reader.dataset import EmailDataset
from data_reader.operations import load_dataset
from adlib.adversaries import CostSensitive

# data operation
dataset = EmailDataset(path='.data_reader/data/raw/trec05p-1/full',
                       binary=False,
                       raw=True)
training_, testing_ = dataset.split({'train': 60, 'test': 40})
training_data = load_dataset(training_)
testing_data = load_dataset(testing_)

# learner
learning_model = BernoulliNB()
learner = SimpleLearner(learning_model, training_data)
learner.train()

# adversary
param = {}
param['Ua'] = [[0, 20], [0, 0]]
param['Vi'] = 0
param['Uc'] = [[1, -1], [-10, 1]]
param['scenario'] = None

adversary = CostSensitive()
adversary.set_params(param)
adversary.set_adversarial_params(learner, training_data)

# test attack

예제 #14

0

파일 보기

파일: k_insertion_test.py 프로젝트: AkliKais/adlib

def test_k_insertion():
    """
    Use as follows:
    python3 adlib/tests/adversaries/k_insertion_test.py #-TO-ADD #-ITERATIONS
    """

    print(
        '\n#################################################################')
    print('START k-insertion attack.\n')

    if len(sys.argv) > 2:
        number_to_add = int(sys.argv[1])
        num_iterations = int(sys.argv[2])
    else:
        number_to_add = 1
        num_iterations = 4

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=True,
                           raw=True)
    training_data = load_dataset(dataset)

    # Randomly choose ~12% of dataset to decrease debugging time
    # 10% was too small for a realistic calculation.
    choices = np.random.binomial(1, 0.12, len(training_data))
    temp = []
    predict_data = []
    count = 0
    for i in range(len(training_data)):
        if choices[i] == 1:
            temp.append(training_data[i])
            count += 1
        else:
            predict_data.append(training_data[i])
    training_data = temp
    print('Training sample size: ', count, '/400\n', sep='')

    # Setting the default learner
    # Test simple learner svm
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()

    original_pred_labels = learner.predict(training_data)
    before_attack_label = original_pred_labels[0]
    orig_learner = deepcopy(learner)

    # Do the attack
    attacker = KInsertion(learner,
                          training_data[0],
                          number_to_add=number_to_add,
                          num_iterations=num_iterations,
                          verbose=True)

    attack_data = attacker.attack(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    print('Number of added instances: ', len(attack_data) - len(training_data))

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(
        training_data)  # predict w/ orig label
    after_attack_label = attack_pred_labels[0]

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print(
        '###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print(
        '###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    print(
        '###################################################################')
    print('Selected instance true label: ', training_data[0].get_label())
    print('Selected instance predicted label BEFORE attack: ',
          before_attack_label)
    print('Selected instance predicted label AFTER attack: ',
          after_attack_label)

    ############################################################################
    # Output loss calculations

    print(
        '###################################################################')
    print('poison_instance loss before attack: ',
          round(attacker.poison_loss_before, 4), '%')
    print('poison_instance loss after attack: ',
          round(attacker.poison_loss_after, 4), '%')
    print('poison_instance loss difference: ',
          round(attacker.poison_loss_after - attacker.poison_loss_before, 4),
          '%')

    print('\nEND k-insertion attack.')
    print(
        '#################################################################\n')

예제 #15

0

파일 보기

파일: data_modification_test.py 프로젝트: xyvivian/adlib

def test_data_modification():
    print()
    print(
        '###################################################################')
    print('START data modification attack.\n')

    begin = time.time()

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False,
                           raw=True)

    training_data, predict_data = dataset.split({'train': 50, 'test': 50})
    training_data = load_dataset(training_data)
    predict_data = load_dataset(predict_data)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    # Setting the default learner
    # Test simple learner svm
    orig_learning_model = svm.SVC(probability=True, kernel='linear')
    orig_learner = SimpleLearner(orig_learning_model, training_data)
    orig_learner.train()

    ############################################################################
    # Calculate target theta, 1 -> spam, -1 -> ham; For the target theta
    # calculation, I am assuming I know which spam I want to be classified
    # as ham and the features I want to have a disproportionate effect on the
    # decision function calculation. For example, if feature #32 is something
    # that all of my spam has in common, I want to make the entry corresponding
    # to #32 (index 32 - 1 = 31) in target_theta to be disproportionately
    # negative so that when my spam is being classified, the 1 indicating that
    # feature #32 is present will be multiplied by a large negative number so as
    # to decrease the value of the decision function and hopefully make it
    # negative so as to classify my spam as ham.

    lnr = orig_learner.model.learner
    eye = np.eye(training_data[0].get_feature_count(), dtype=int)
    orig_theta = lnr.decision_function(eye) - lnr.intercept_[0]
    target_theta = deepcopy(orig_theta)

    spam_instances = []
    for inst in training_data + predict_data:
        if inst.get_label() == 1:
            spam_instances.append(inst)

    spam_features, ham_features = get_spam_features(spam_instances)

    # Set features to recognize spam as ham
    for index in spam_features:
        target_theta[index] = -10

    for index in ham_features:
        target_theta[index] = 0.01

    print('Features selected: ', np.array(spam_features))
    print('Number of features: ', len(spam_features))

    ############################################################################

    # Get original predictions
    original_pred_labels = orig_learner.predict(training_data)

    # Do the attack
    attacker = DataModification(orig_learner, target_theta, verbose=True)
    attack_data = attacker.attack(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(
        training_data)  # predict w/ orig label

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print(
        '###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print(
        '###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    spam_pred_labels = learner.predict(spam_instances)
    spam_ham_count = sum(map(lambda x: 1 if x == -1 else 0, spam_pred_labels))
    print(
        '###################################################################')
    print('Number of spam instances in original training set that were \n',
          'classified as ham after the attack: ',
          spam_ham_count,
          '/',
          len(spam_instances),
          sep='')

    end = time.time()
    print('\nTotal time: ', round(begin - end, 2), 's', '\n', sep='')

    print('\nEND data modification attack.')
    print(
        '###################################################################')
    print()

예제 #16

0

파일 보기

def test_k_insertion():
    """
    Use as follows:
    python3 adlib/tests/adversaries/k_insertion_test.py NUMBER-TO-ADD
    """

    print()
    print('###################################################################')
    print('START k-insertion attack.\n')

    begin = time.time()

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False, raw=True)
    training_data, predict_data = dataset.split({'train': 20, 'test': 80})
    training_data = load_dataset(training_data)
    predict_data = load_dataset(predict_data)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    if len(sys.argv) > 2:
        number_to_add = int(sys.argv[1])
    else:
        number_to_add = int(0.25 * len(training_data))

    # Setting the default learner
    # Test simple learner svm
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()

    original_pred_labels = learner.predict(training_data)
    before_attack_label = original_pred_labels[0]
    orig_learner = deepcopy(learner)

    # Do the attack
    attacker = KInsertion(learner,
                          training_data[0],
                          number_to_add=number_to_add,
                          verbose=True)

    attack_data = attacker.attack(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    print('Number of added instances: ', len(attack_data) - len(training_data))

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(training_data)  # predict w/ orig label
    after_attack_label = attack_pred_labels[0]

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print('###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print('###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    print('###################################################################')
    print('Selected instance true label: ', training_data[0].get_label())
    print('Selected instance predicted label BEFORE attack: ',
          before_attack_label)
    print('Selected instance predicted label AFTER attack: ',
          after_attack_label)

    ############################################################################
    # Output loss calculations

    print('###################################################################')
    print('poison_instance loss before attack: ',
          round(attacker.poison_loss_before, 4))
    print('poison_instance loss after attack: ',
          round(attacker.poison_loss_after, 4))
    print('poison_instance loss difference: ',
          round(attacker.poison_loss_after - attacker.poison_loss_before, 4))

    end = time.time()
    print('\nTotal time: ', round(end - begin, 2), 's', '\n', sep='')

    print('\nEND k-insertion attack.')
    print('###################################################################')
    print()