Example #1
0
    def _retrain(self):
        # Retrain the model with poisoned data
        learning_model = svm.SVC(probability=True, kernel='linear')
        self.attack_learner = SimpleLearner(learning_model,
                                            self.attack_instances)
        self.attack_learner.train()

        self.attack_training_pred_labels = self.attack_learner.predict(
            self.training_instances)
        self.attack_testing_pred_labels = self.attack_learner.predict(
            self.testing_instances)
Example #2
0
    def _setup(self):
        if self.verbose:
            print('Training sample size: ',
                  len(self.training_instances),
                  '/400\n',
                  sep='')

        # Setting the default learner
        learning_model = svm.SVC(probability=True, kernel='linear')
        self.learner = SimpleLearner(learning_model, self.training_instances)
        self.learner.train()

        self.training_pred_labels = self.learner.predict(
            self.training_instances)
        self.testing_pred_labels = self.learner.predict(self.testing_instances)
Example #3
0
def test_label_flipping():
    print('\n#################################################################')
    print('START label flipping attack.\n')

    begin = time.time()

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=True, raw=True)
    training_data = load_dataset(dataset)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    # Randomly cut dataset in approximately half
    rand_choices = np.random.binomial(1, 0.5, len(training_data))
    new_training_data = []
    predict_data = []
    for i in range(len(training_data)):
        if rand_choices[i] == 1:
            new_training_data.append(training_data[i])
        else:
            predict_data.append(training_data[i])
    training_data = new_training_data

    # Setting the default learner
    # Test simple learner svm
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()
    orig_learner = deepcopy(learner)

    # Execute the attack
    cost = list(np.random.binomial(2, 0.5, len(training_data)))
    total_cost = 0.3 * len(training_data)  # flip around ~30% of the labels
    attacker = LabelFlipping(learner, cost, total_cost, verbose=True)
    attack_data = attacker.attack(training_data)

    flip_vector = []  # 0 -> flipped, 1 -> not flipped
    for i in range(len(attack_data)):
        if attack_data[i].get_label() != training_data[i].get_label():
            flip_vector.append(0)
        else:
            flip_vector.append(1)

    print('Flip vector with 0 -> flipped and 1 -> not flipped: \n',
          np.array(flip_vector), '\n')

    original_pred_labels = learner.predict(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(training_data)  # predict w/ orig label

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print('###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print('###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    end = time.time()
    print('\nTotal time: ', round(begin - end, 2), 's', '\n', sep='')

    print('\nEND label flipping attack.')
    print('#################################################################\n')
Example #4
0
    ls = [x for x, y in zip(x_test, y_pred) if x.label == 1 and y == 1]
    print("{0} malicious instances are being detected initially")
    return ls, [x.label for x in ls]


dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                       binary=True,
                       raw=True)
training_, testing_ = dataset.split({'train': 60, 'test': 40})
training_data = load_dataset(training_)
testing_data = load_dataset(testing_)
test_true_label = [x.label for x in testing_data]

# test simple learner svm
learning_model = svm.SVC(probability=True, kernel='linear')
learner1 = SimpleLearner(learning_model, training_data)
learner1.train()

predictions = learner1.predict(testing_data)
print("======== initial prediction =========")
print(summary(predictions, test_true_label))

# Note: should not use only malicious data
attacker = GoodWord(n=500)
attacker.set_adversarial_params(learner1, testing_data)
new_testing_data = attacker.attack(testing_data)

predictions2 = learner1.predict(new_testing_data)
print("========= post-attack prediction =========")
print("post attack preds" + str(predictions2))
print(type(predictions2).__name__)
def NB_learner(data):
    learner_model = GaussianNB()
    learner = SimpleLearner(model=learner_model,
                            training_instances=data['training_data'])
    learner.train()
    return learner
Example #6
0
def simple_learner(data):
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, data['training_data'])
    learner.train()
    return learner
Example #7
0
def test_iterative_retraining_learner():
    print()
    print(
        '###################################################################')
    print('START TRIM learner test.\n')

    begin = time.time()

    if len(sys.argv) == 2 and sys.argv[1] in [
            'label-flipping', 'k-insertion', 'data-modification'
    ]:
        attacker_name = sys.argv[1]
    else:
        attacker_name = 'label-flipping'

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=True,
                           raw=True)

    training_data, testing_data = dataset.split({'train': 20, 'test': 80})
    training_data = load_dataset(training_data)
    testing_data = load_dataset(testing_data)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    # Setting the default learner
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()

    original_pred_labels = learner.predict(training_data)
    orig_learner = deepcopy(learner)

    # Execute the attack
    if attacker_name == 'label-flipping':
        cost = list(np.random.binomial(2, 0.5, len(training_data)))
        total_cost = 0.3 * len(training_data)  # flip around ~30% of the labels
        attacker = LabelFlipping(learner, cost, total_cost, verbose=True)
    elif attacker_name == 'k-insertion':
        number_to_add = int(0.25 * len(training_data))
        attacker = KInsertion(learner,
                              training_data[0],
                              number_to_add=number_to_add,
                              verbose=True)
    else:  # attacker_name == 'data-modification'
        lnr = orig_learner.model.learner
        eye = np.eye(training_data[0].get_feature_count(), dtype=int)
        orig_theta = lnr.decision_function(eye) - lnr.intercept_[0]
        target_theta = deepcopy(orig_theta)

        spam_instances = []
        for inst in training_data + testing_data:
            if inst.get_label() == 1:
                spam_instances.append(inst)

        spam_features, ham_features = get_spam_features(spam_instances)

        # Set features to recognize spam as ham
        for index in spam_features:
            target_theta[index] = -10

        for index in ham_features:
            target_theta[index] = 0.01

        print('Features selected: ', np.array(spam_features))
        print('Number of features: ', len(spam_features))

        attacker = DataModification(orig_learner, target_theta, verbose=True)

    print(
        '###################################################################')
    print('START', attacker_name, 'attack.\n')

    attack_data = attacker.attack(training_data)

    print('\nEND', attacker_name, 'attack.')
    print(
        '###################################################################')
    print()

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    print(
        '###################################################################')
    print('START Iterative Retraining learner.\n')

    ###

    print('\nEND Iterative Retraining learner.')
    print(
        '###################################################################')
    print()

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(
        training_data)  # predict w/ orig label

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print(
        '###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(testing_data)
    attack_pred_labels = learner.predict(testing_data)

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 testing_data)

    print(
        '###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with trim learner

    data = training_data + testing_data
    trim_pred_labels = trim_learner.predict(data)
    normal_pred_labels = learner.predict(data)

    (trim_percent_correct, normal_percent_correct,
     difference) = calculate_correct_percentages(trim_pred_labels,
                                                 normal_pred_labels, data)

    print(
        '###################################################################')
    print('Predictions using TRIM learner:')
    print('TRIM learner percentage: ', trim_percent_correct, '%')
    print('Simple learner correct percentage: ', normal_percent_correct, '%')
    print('Difference: ', difference, '%')

    end = time.time()
    print('\nTotal time: ', round(begin - end, 2), 's', '\n', sep='')

    print('\nEND TRIM learner test.')
    print(
        '###################################################################')
    print()
Example #8
0
def test_k_insertion():
    """
    Use as follows:
    python3 adlib/tests/adversaries/k_insertion_test.py #-TO-ADD #-ITERATIONS
    """

    print(
        '\n#################################################################')
    print('START k-insertion attack.\n')

    if len(sys.argv) > 2:
        number_to_add = int(sys.argv[1])
        num_iterations = int(sys.argv[2])
    else:
        number_to_add = 1
        num_iterations = 4

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=True,
                           raw=True)
    training_data = load_dataset(dataset)

    # Randomly choose ~12% of dataset to decrease debugging time
    # 10% was too small for a realistic calculation.
    choices = np.random.binomial(1, 0.12, len(training_data))
    temp = []
    predict_data = []
    count = 0
    for i in range(len(training_data)):
        if choices[i] == 1:
            temp.append(training_data[i])
            count += 1
        else:
            predict_data.append(training_data[i])
    training_data = temp
    print('Training sample size: ', count, '/400\n', sep='')

    # Setting the default learner
    # Test simple learner svm
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()

    original_pred_labels = learner.predict(training_data)
    before_attack_label = original_pred_labels[0]
    orig_learner = deepcopy(learner)

    # Do the attack
    attacker = KInsertion(learner,
                          training_data[0],
                          number_to_add=number_to_add,
                          num_iterations=num_iterations,
                          verbose=True)

    attack_data = attacker.attack(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    print('Number of added instances: ', len(attack_data) - len(training_data))

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(
        training_data)  # predict w/ orig label
    after_attack_label = attack_pred_labels[0]

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print(
        '###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print(
        '###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    print(
        '###################################################################')
    print('Selected instance true label: ', training_data[0].get_label())
    print('Selected instance predicted label BEFORE attack: ',
          before_attack_label)
    print('Selected instance predicted label AFTER attack: ',
          after_attack_label)

    ############################################################################
    # Output loss calculations

    print(
        '###################################################################')
    print('poison_instance loss before attack: ',
          round(attacker.poison_loss_before, 4), '%')
    print('poison_instance loss after attack: ',
          round(attacker.poison_loss_after, 4), '%')
    print('poison_instance loss difference: ',
          round(attacker.poison_loss_after - attacker.poison_loss_before, 4),
          '%')

    print('\nEND k-insertion attack.')
    print(
        '#################################################################\n')
Example #9
0
def test_data_modification():
    print()
    print(
        '###################################################################')
    print('START data modification attack.\n')

    begin = time.time()

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False,
                           raw=True)

    training_data, predict_data = dataset.split({'train': 50, 'test': 50})
    training_data = load_dataset(training_data)
    predict_data = load_dataset(predict_data)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    # Setting the default learner
    # Test simple learner svm
    orig_learning_model = svm.SVC(probability=True, kernel='linear')
    orig_learner = SimpleLearner(orig_learning_model, training_data)
    orig_learner.train()

    ############################################################################
    # Calculate target theta, 1 -> spam, -1 -> ham; For the target theta
    # calculation, I am assuming I know which spam I want to be classified
    # as ham and the features I want to have a disproportionate effect on the
    # decision function calculation. For example, if feature #32 is something
    # that all of my spam has in common, I want to make the entry corresponding
    # to #32 (index 32 - 1 = 31) in target_theta to be disproportionately
    # negative so that when my spam is being classified, the 1 indicating that
    # feature #32 is present will be multiplied by a large negative number so as
    # to decrease the value of the decision function and hopefully make it
    # negative so as to classify my spam as ham.

    lnr = orig_learner.model.learner
    eye = np.eye(training_data[0].get_feature_count(), dtype=int)
    orig_theta = lnr.decision_function(eye) - lnr.intercept_[0]
    target_theta = deepcopy(orig_theta)

    spam_instances = []
    for inst in training_data + predict_data:
        if inst.get_label() == 1:
            spam_instances.append(inst)

    spam_features, ham_features = get_spam_features(spam_instances)

    # Set features to recognize spam as ham
    for index in spam_features:
        target_theta[index] = -10

    for index in ham_features:
        target_theta[index] = 0.01

    print('Features selected: ', np.array(spam_features))
    print('Number of features: ', len(spam_features))

    ############################################################################

    # Get original predictions
    original_pred_labels = orig_learner.predict(training_data)

    # Do the attack
    attacker = DataModification(orig_learner, target_theta, verbose=True)
    attack_data = attacker.attack(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(
        training_data)  # predict w/ orig label

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print(
        '###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct, attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print(
        '###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    spam_pred_labels = learner.predict(spam_instances)
    spam_ham_count = sum(map(lambda x: 1 if x == -1 else 0, spam_pred_labels))
    print(
        '###################################################################')
    print('Number of spam instances in original training set that were \n',
          'classified as ham after the attack: ',
          spam_ham_count,
          '/',
          len(spam_instances),
          sep='')

    end = time.time()
    print('\nTotal time: ', round(begin - end, 2), 's', '\n', sep='')

    print('\nEND data modification attack.')
    print(
        '###################################################################')
    print()
Example #10
0
def empty_learner():
    return SimpleLearner()
Example #11
0
def test_k_insertion():
    """
    Use as follows:
    python3 adlib/tests/adversaries/k_insertion_test.py NUMBER-TO-ADD
    """

    print()
    print('###################################################################')
    print('START k-insertion attack.\n')

    begin = time.time()

    # Data processing unit
    # The path is an index of 400 testing samples(raw email data).
    dataset = EmailDataset(path='./data_reader/data/raw/trec05p-1/test-400',
                           binary=False, raw=True)
    training_data, predict_data = dataset.split({'train': 20, 'test': 80})
    training_data = load_dataset(training_data)
    predict_data = load_dataset(predict_data)

    print('Training sample size: ', len(training_data), '/400\n', sep='')

    if len(sys.argv) > 2:
        number_to_add = int(sys.argv[1])
    else:
        number_to_add = int(0.25 * len(training_data))

    # Setting the default learner
    # Test simple learner svm
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, training_data)
    learner.train()

    original_pred_labels = learner.predict(training_data)
    before_attack_label = original_pred_labels[0]
    orig_learner = deepcopy(learner)

    # Do the attack
    attacker = KInsertion(learner,
                          training_data[0],
                          number_to_add=number_to_add,
                          verbose=True)

    attack_data = attacker.attack(training_data)

    # Retrain the model with poisoned data
    learning_model = svm.SVC(probability=True, kernel='linear')
    learner = SimpleLearner(learning_model, attack_data)
    learner.train()

    print('Number of added instances: ', len(attack_data) - len(training_data))

    ############################################################################
    # Calculate statistics with training data

    attack_pred_labels = learner.predict(training_data)  # predict w/ orig label
    after_attack_label = attack_pred_labels[0]

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 training_data)

    print('###################################################################')
    print('Predictions with training dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    original_pred_labels = orig_learner.predict(predict_data)
    attack_pred_labels = learner.predict(predict_data)

    (orig_precent_correct,
     attack_precent_correct,
     difference) = calculate_correct_percentages(original_pred_labels,
                                                 attack_pred_labels,
                                                 predict_data)

    print('###################################################################')
    print('Predictions with other half of dataset:')
    print('Original correct percentage: ', orig_precent_correct, '%')
    print('Attack correct percentage: ', attack_precent_correct, '%')
    print('Difference: ', difference, '%')

    ############################################################################
    # Calculate statistics with predict data (other half of dataset)

    print('###################################################################')
    print('Selected instance true label: ', training_data[0].get_label())
    print('Selected instance predicted label BEFORE attack: ',
          before_attack_label)
    print('Selected instance predicted label AFTER attack: ',
          after_attack_label)

    ############################################################################
    # Output loss calculations

    print('###################################################################')
    print('poison_instance loss before attack: ',
          round(attacker.poison_loss_before, 4))
    print('poison_instance loss after attack: ',
          round(attacker.poison_loss_after, 4))
    print('poison_instance loss difference: ',
          round(attacker.poison_loss_after - attacker.poison_loss_before, 4))

    end = time.time()
    print('\nTotal time: ', round(end - begin, 2), 's', '\n', sep='')

    print('\nEND k-insertion attack.')
    print('###################################################################')
    print()