Esempio n. 1
0
        optimizer = tf.train.AdamOptimizer()
        env.train_op = optimizer.minimize(env.loss)

    env.saver = tf.train.Saver()

with tf.variable_scope('model', reuse=True):
    env.adv_eps = tf.placeholder(tf.float32, (), name='adv_eps')
    env.adv_epochs = tf.placeholder(tf.int32, (), name='adv_epochs')
    env.adv_y = tf.placeholder(tf.int32, (), name='adv_y')

    env.x_fgsm = fgm(model, env.x, epochs=env.adv_epochs, eps=env.adv_eps)
    env.x_deepfool = deepfool(model, env.x, epochs=env.adv_epochs, batch=True)
    env.x_jsma = jsma(model,
                      env.x,
                      env.adv_y,
                      eps=env.adv_eps,
                      epochs=env.adv_epochs)

print('\nInitializing graph')

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())


def evaluate(sess, env, X_data, y_data, batch_size=128):
    """
    Evaluate TF model by running env.loss and env.acc.
    """
    print('\nEvaluating')
                                                       logits=logits)
        env.loss = tf.reduce_mean(xent, name='loss')

    with tf.variable_scope('train_op'):
        optimizer = tf.train.AdamOptimizer()
        env.train_op = optimizer.minimize(env.loss)

    env.saver = tf.train.Saver()

with tf.variable_scope('model', reuse=True):
    env.target = tf.placeholder(tf.int32, (), name='target')
    env.adv_epochs = tf.placeholder_with_default(20, shape=(), name='epochs')
    env.adv_eps = tf.placeholder_with_default(0.2, shape=(), name='eps')
    env.x_jsma = jsma(model,
                      env.x,
                      env.target,
                      eps=env.adv_eps,
                      epochs=env.adv_epochs)

print('\nInitializing graph')

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())


def evaluate(sess, env, X_data, y_data, batch_size=128):
    """
    Evaluate TF model by running env.loss and env.acc.
    """
    print('\nEvaluating')
Esempio n. 3
0
        false_negative = 0
        print('---------------------')
        print('ATTACKING BLACK_BOX CLASSIFIER...')
        adversarial_examples = []
        clean_examples = []
        aya = 0
        for xmal in x_mal_train[0:x_malware.shape[0]]:
            # for xmal in x_mal_train[0:20]:
            if target_model.model.predict(xmal.reshape(1, -1)) == 0:
                false_negative = false_negative + 1
                # print('this is a flase negative')
            else:
                xmal = torch.from_numpy(xmal).float().cuda()
                result = attacks.jsma(target_model,
                                      sarogate_model,
                                      xmal.unsqueeze(0),
                                      0,
                                      max_distortion=0.027)

                distrotion = torch.sum(result - xmal)
                if target_model.model.predict(
                        result.cpu().detach().numpy()) == 1:
                    # print('failiure')
                    # print('====================================================================')

                    failiure = failiure + 1
                else:
                    # print('====================================================================')
                    #
                    # print(distrotion)
                    # print('====================================================================')
                                                       logits=logits)
        env.loss = tf.reduce_mean(xent, name='loss')

    with tf.variable_scope('train_op'):
        optimizer = tf.train.AdamOptimizer()
        env.train_op = optimizer.minimize(env.loss)

    env.saver = tf.train.Saver()

with tf.variable_scope('model', reuse=True):
    env.target = tf.placeholder(tf.int32, (), name='target')
    env.adv_epochs = tf.placeholder_with_default(20, shape=(), name='epochs')
    env.adv_eps = tf.placeholder_with_default(0.2, shape=(), name='eps')
    env.x_jsma = jsma(model,
                      env.x,
                      env.target,
                      eps=env.adv_eps,
                      epochs=env.adv_epochs,
                      score_fn=lambda t, o: t - o)

print('\nInitializing graph')

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())


def evaluate(sess, env, X_data, y_data, batch_size=128):
    """
    Evaluate TF model by running env.loss and env.acc.
    """
    print('\nEvaluating')
Esempio n. 5
0
def main(argv=None):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :return:
    """

    os.environ['KERAS_BACKEND']='tensorflow'

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)
    
    fileName = "statistics/JAMA_dataCollection_%s.txt"%(FLAGS.round)
    fileHandler = open(fileName, 'a')

    ###########################################################################
    # Define the dataset and model
    ###########################################################################

    # Image dimensions ordering should follow the Theano convention
    if K.image_dim_ordering() != 'tf':
        K.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' "
              "to 'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    K.set_session(sess)
    print("Created TensorFlow session and set Keras backend.")

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist()
    print("Loaded MNIST test data.")

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model()
    
    ############
    #
    ###########
    
    first_dense = True
    
    if FLAGS.round ==1 : 
        weight_fn = 'tf-kernels-tf-dim-ordering/mnist.h5'
    else: 
        weight_fn = 'tf-kernels-tf-dim-ordering/mnist_retrained_pixelSets_5526_20_L1_0.03.h5'
    model.load_weights(weight_fn) # tf-kernels-tf-dim
    convert_all_kernels_in_model(model) # th-kernels-tf-dim

    count_dense = 0
    for layer in model.layers:
        if layer.__class__.__name__ == "Dense":
            count_dense += 1

    if count_dense == 1:
        first_dense = False # If there is only 1 dense, no need to perform row shuffle in Dense layer

    print("Nb layers : ", len(model.layers))

    for index, tf_layer in enumerate(model.layers):
        if tf_layer.__class__.__name__ in ['Convolution1D',
                                           'Convolution2D',
                                           'Convolution3D',
                                           'AtrousConvolution2D',
                                           'Deconvolution2D']:
            weights = tf_layer.get_weights() # th-kernels-tf-dim
            model.layers[index].set_weights(weights) # th-kernels-tf-dim

            nb_last_conv = tf_layer.nb_filter # preserve last number of convolutions to use with dense layers
            print("Converted layer %d : %s" % (index + 1, tf_layer.name))
        else:
            if tf_layer.__class__.__name__ == "Dense" and first_dense:
                weights = tf_layer.get_weights()
                nb_rows_dense_layer = weights[0].shape[0] // nb_last_conv

                print("Magic Number 1 : ", nb_last_conv)
                print("Magic nunber 2 : ", nb_rows_dense_layer)

                model.layers[index].set_weights(weights)

                first_dense = False
                print("Shuffled Dense Weights layer and saved %d : %s" % (index + 1, tf_layer.name))
            else:
                model.layers[index].set_weights(tf_layer.get_weights())
                print("Saved layer %d : %s" % (index + 1, tf_layer.name))
    
    predictions = model(x)
    print("Defined TensorFlow model graph.")
    
    
    #filename = "pic/%s.jpg"%(FLAGS.starting_index)
    #testImage = np.squeeze(X_test[(FLAGS.starting_index):(FLAGS.starting_index+1)][0])
    #print("%s--%s"%(str(np.amax(testImage)), str(np.amin(testImage))))
    #save(0,testImage,filename)
    

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    '''

    # Train an MNIST model if it does not exist in the train_dir folder
    saver = tf.train.Saver()
    save_path = os.path.join(FLAGS.train_dir, FLAGS.filename)
    if os.path.isfile(save_path):
        saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.filename))
    else:
        train_params = {
            'nb_epochs': FLAGS.nb_epochs,
            'batch_size': FLAGS.batch_size,
            'learning_rate': FLAGS.learning_rate
        }
        model_train(sess, x, y, predictions, X_train, Y_train,
                    args=train_params)
        saver.save(sess, save_path)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': FLAGS.batch_size}
    accuracy = model_eval(sess, x, y, predictions, X_test, Y_test,
                          args=eval_params)
    assert X_test.shape[0] == 10000, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    
    '''


    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(FLAGS.source_samples) + ' * ' +
          str(FLAGS.nb_classes-1) + ' adversarial examples')

    # This array indicates whether an adversarial example was found for each
    # test set sample and target class
    results = np.zeros((FLAGS.nb_classes, FLAGS.source_samples), dtype='i')

    # This array contains the fraction of perturbed features for each test set
    # sample and target class
    perturbations = np.zeros((FLAGS.nb_classes, FLAGS.source_samples),
                             dtype='f')

    # Define the TF graph for the model's Jacobian
    grads = jacobian_graph(predictions, x, FLAGS.nb_classes)

    # Initialize our array for grid visualization
    grid_shape = (FLAGS.nb_classes,
                  FLAGS.nb_classes,
                  FLAGS.img_rows,
                  FLAGS.img_cols,
                  FLAGS.nb_channels)
    grid_viz_data = np.zeros(grid_shape, dtype='f')
    
    eud = {}
    l1d = {}
    succ = {}

    # Loop over the samples we want to perturb into adversarial examples
    for sample_ind in xrange(0, FLAGS.source_samples):
        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(Y_test[FLAGS.starting_index + sample_ind]))
        target_classes = other_classes(FLAGS.nb_classes, current_class)
        
        print('working with image id: %s\n'%(FLAGS.starting_index+sample_ind))
        filename = "pic/%s_jsma.jpg"%(FLAGS.starting_index + sample_ind)
        testImage = np.squeeze(X_test[(FLAGS.starting_index + sample_ind):(FLAGS.starting_index + sample_ind+1)][0])
        save(0,testImage,filename)

        # For the grid visualization, keep original images along the diagonal
        #grid_viz_data[current_class, current_class, :, :, :] = np.reshape(
        #        X_test[sample_ind:(sample_ind+1)],
        #        (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))
                
        # initialise data collection
        eud[sample_ind] = 1000.0
        l1d[sample_ind] = 1000.0
        succ[sample_ind] = 0

        # Loop over all target classes
        for target in target_classes:
            print('--------------------------------------')
            print('Creating adv. example for target class ' + str(target))

            # This call runs the Jacobian-based saliency map approach
            adv_x, res, percent_perturb = jsma(sess, x, predictions, grads,
                                               X_test[(FLAGS.starting_index+sample_ind):
                                                      (FLAGS.starting_index+sample_ind+1)],
                                               target, theta=FLAGS.thetaValue, gamma=0.05,
                                               increase=True, back='tf',
                                               clip_min=0, clip_max=1)
                                               
            #print(np.max(adv_x))

            # Display the original and adversarial images side-by-side
            #if FLAGS.viz_enabled:
            #    if 'figure' not in vars():
            #            figure = pair_visual(
            #                    np.reshape(X_test[(FLAGS.starting_index+sample_ind):(FLAGS.starting_index+sample_ind+1)],
            #                               (FLAGS.img_rows, FLAGS.img_cols)),
            #                    np.reshape(adv_x,
            #                               (FLAGS.img_rows, FLAGS.img_cols)))
            #    else:
            #        figure = pair_visual(
            #                np.reshape(X_test[(FLAGS.starting_index+sample_ind):(FLAGS.starting_index+sample_ind+1)],
            #                           (FLAGS.img_rows, FLAGS.img_cols)),
            #                np.reshape(adv_x, (FLAGS.img_rows,
            #                           FLAGS.img_cols)), figure)

            # Add our adversarial example to our grid data
            #grid_viz_data[target, current_class, :, :, :] = np.reshape(
            #        adv_x, (FLAGS.img_rows, FLAGS.img_cols, FLAGS.nb_channels))
                    
            filename = "pic/%s_jsma_%s_%s.jpg"%(FLAGS.starting_index+sample_ind,FLAGS.thetaValue,target)                        
            testImage1 = np.squeeze(adv_x[0])
            fileHandler.write("\nimage id: %s\n"%(FLAGS.starting_index+sample_ind))
            fileHandler.write("theta value: %s\n"%(FLAGS.thetaValue))
            fileHandler.write("target: %s\n"%(target))
            fileHandler.write("euclidean distance: %s\n"%(euclideanDistance(testImage1,testImage))) 
            fileHandler.write("L1 distance: %s\n"%(l1Distance(testImage1,testImage)))
            save(0,testImage1,filename)


            # Update the arrays for later analysis
            results[target, sample_ind] = res
            perturbations[target, sample_ind] = percent_perturb
            
            # collect data 
            temp_x = X_test[FLAGS.starting_index+sample_ind]
            adv_x = adv_x[0]
            temp_eud = euclideanDistance(temp_x,adv_x)
            if eud[sample_ind] > temp_eud: 
                eud[sample_ind] = temp_eud
            temp_l1d = l1Distance(temp_x,adv_x)
            if l1d[sample_ind] > temp_l1d: 
                l1d[sample_ind] = temp_l1d  
            if succ[sample_ind] == 0: 
                succ[sample_ind] = res    
                
            #print("res=%s"%(res)) 

    # Compute the number of adversarial examples that were successfuly found
    nb_targets_tried = ((FLAGS.nb_classes - 1) * FLAGS.source_samples)
    succ_rate = float(np.sum(results)) / nb_targets_tried
    print('Avg. rate of successful adv. examples {0:.2f}'.format(succ_rate))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(perturbations)
    print('Avg. rate of perturbed features {0:.2f}'.format(percent_perturbed))

    # Compute the average distortion introduced for successful samples only
    percent_perturb_succ = np.mean(perturbations * (results == 1))
    print('Avg. rate of perturbed features for successful '
          'adversarial examples {0:.2f}'.format(percent_perturb_succ))
          
    # print data 
    for e in eud.keys():
        eud[e] = eud[e] * succ[e] 
    for e in l1d.keys():
        l1d[e] = l1d[e] * succ[e] 
    print("Average Euclidean distance is %s"%(sum(eud.values()) / float(len(eud))))
    print("Average L1 distance is %s"%(sum(l1d.values()) / float(len(l1d))))
    print("Success rate is %s"%(sum(succ.values()) / float(len(succ))))
    

    fileHandler.write("Average Euclidean distance is %s\n"%(sum(eud.values()) / float(len(eud))))
    fileHandler.write("Average L1 distance is %s\n"%(sum(l1d.values()) / float(len(l1d))))
    fileHandler.write("Success rate is %s\n"%(sum(succ.values()) / float(len(succ))))
    fileHandler.close()
    
    # Close TF session
    sess.close()