Ejemplo n.º 1
0
    max_lbfgs_iter=max_lbfgs_iter,
    num_classes=num_classes,
    batch_size=batch_size,
    data_sets=data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='%s_inception_onlytop' % dataset_name)

inception_model.train()

inception_predicted_loss_diffs = inception_model.get_influence_on_test_loss(
    [test_idx],
    np.arange(len(inception_model.data_sets.train.labels)),
    force_refresh=True)

x_test = X_test[test_idx, :]
y_test = Y_test[test_idx]

distances = dataset.find_distances(x_test, X_train)
flipped_idx = Y_train != y_test
rbf_margins_test = rbf_model.sess.run(rbf_model.margin,
                                      feed_dict=rbf_model.all_test_feed_dict)
rbf_margins_train = rbf_model.sess.run(rbf_model.margin,
                                       feed_dict=rbf_model.all_train_feed_dict)
inception_Y_pred_correct = get_Y_pred_correct_inception(inception_model)

np.savez('output/rbf_results',
         test_idx=test_idx,
Ejemplo n.º 2
0
test_idx = 8
actual_loss_diffs, predicted_loss_diffs_cg, indices_to_remove = experiments.test_retraining(
    tf_model,
    test_idx,
    iter_to_load=0,
    force_refresh=False,
    num_to_remove=500,
    remove_type='maxinf',
    random_seed=0)

# LiSSA
np.random.seed(17)
predicted_loss_diffs_lissa = tf_model.get_influence_on_test_loss(
    [test_idx],
    indices_to_remove,
    approx_type='cg',
    approx_params={
        'scale': 25,
        'recursion_depth': 5000,
        'damping': 0,
        'batch_size': 1,
        'num_samples': 10
    },
    force_refresh=True)

np.savez('output/spam_logreg_lbfgs_retraining-500.npz',
         actual_loss_diffs=actual_loss_diffs,
         predicted_loss_diffs_cg=predicted_loss_diffs_cg,
         predicted_loss_diffs_lissa=predicted_loss_diffs_lissa,
         indices_to_remove=indices_to_remove)
    # plt.title("centroid 9")
    # plt.show()

    mask_neg = Y_test == -1
    k = -1
    n = -1
    while k < closest_7_idx:
        n = n + 1
        if mask_neg[n]:
            k = k + 1

    test_idx_7 = n
    num_train = len(tf_model.data_sets.train.labels)

    influences_7 = tf_model.get_influence_on_test_loss(
        [test_idx_7],
        np.arange(len(tf_model.data_sets.train.labels)),
        force_refresh=True) * num_train

    fittedKmeans_1 = KMeans(n_clusters=numClusters,
                            random_state=randomState).fit(class_wise_1)
    clusters_1 = fittedKmeans_1.predict(class_wise_1)
    centroids_1 = fittedKmeans_1.cluster_centers_
    #pcaVis(class_wise_1, clusters_1, centroids_1, "for fours")
    distanceScoresByCluster_1, minDist_1, maxDist_1 = getDistancesbyCluster(
        class_wise_1, clusters_1, centroids_1)
    closest_1_idx = getClosest(distanceScoresByCluster_1)[0]
    # plt.figure(2)
    # plt.imshow(class_wise_1[closest_1_idx, :].reshape(28,-1))
    # plt.title("centroid 4")
    # plt.show()
Ejemplo n.º 4
0
#         top_model.update_train_x(X_train)
#         top_model.train()

#     if attack_success:
#         break

### Try attacking all test examples
step_size = 0.005
test_indices = np.arange(num_test)
test_description = 'all_%s' % dataset_name

## Find idx to poison?
# Use top model to quickly generate inverse HVP
with top_graph.as_default():
    top_model.get_influence_on_test_loss(test_indices, [0],
                                         test_description=test_description,
                                         force_refresh=True)
copyfile(
    'output/%s-cg-normal_loss-test-%s.npz' %
    (top_model_name, test_description),
    'output/%s-cg-normal_loss-test-%s.npz' %
    (full_model_name, test_description))

with full_graph.as_default():
    grad_influence_wrt_input_val = full_model.get_grad_of_influence_wrt_input(
        np.arange(num_train),
        test_indices,
        force_refresh=False,
        test_description=test_description,
        loss_type='normal_loss')
    indices_to_poison = select_examples_to_attack(full_model,
Ejemplo n.º 5
0
    data_sets=lr_data_sets,
    initial_learning_rate=initial_learning_rate,
    keep_probs=keep_probs,
    decay_epochs=decay_epochs,
    mini_batch=False,
    train_dir='output',
    log_dir='log',
    model_name='mnist-17_logreg')

tf_model.train()

test_idx = 30
num_train = len(tf_model.data_sets.train.labels)

influences = tf_model.get_influence_on_test_loss(
    [test_idx],
    np.arange(len(tf_model.data_sets.train.labels)),
    force_refresh=True) * num_train

influences_without_train_error = tf_model.get_influence_on_test_loss(
    [test_idx],
    np.arange(len(tf_model.data_sets.train.labels)),
    force_refresh=False,
    ignore_training_error=True) * num_train

influences_without_hessian = tf_model.get_influence_on_test_loss(
    [test_idx],
    np.arange(len(tf_model.data_sets.train.labels)),
    force_refresh=False,
    ignore_hessian=True) * num_train

influences_without_both = tf_model.get_influence_on_test_loss(