예제 #1
0
def run_fuzzing(dataset_name,
                model,
                x_train,
                y_train,
                x_test,
                y_test,
                model_layer,
                folder_to_store,
                order_numbers=10):

    for order_number in range(0, order_numbers):

        file_path = '{}nc_index_{}.npy'.format(folder_to_store, order_number)

        # only perform fuzzing if the file does not exist
        if not os.path.exists(file_path):
            nc_index = {}
            nc_number = 0
            lower_bound = 3000 * order_number
            upper_bound = 3000 * (order_number + 1)

            if lower_bound > len(x_train): lower_bound = len(x_train)

            if upper_bound > len(x_train): upper_bound = len(x_train)

            for i in range(lower_bound, upper_bound):
                new_image = mutate(x_train[i], dataset_name)

                if i == 5000 * order_number + 1000 or i == 5000 * order_number + 3000:
                    print(
                        "-------------------------------------THIS IS {}-------------------------------------"
                        .format(i))
                if softmax(model.predict(np.expand_dims(
                        new_image, axis=0))).argmax(axis=-1) != softmax(
                            model.predict(np.expand_dims(
                                x_train[i], axis=0))).argmax(axis=-1):

                    nc_symbol = compare_nc(model, x_train, y_train, x_test,
                                           y_test, new_image, x_train[i],
                                           model_layer)

                    if nc_symbol == True:
                        nc_index[i] = new_image
                        nc_number += 1

            print(nc_number)
            np.save(file_path, nc_index)
예제 #2
0
                nc_number = 0
                for i in range(3000 * order_number, 3000 * (order_number + 1)):
                    new_image = mutate(x_train[i], dataset)

                    if i == 5000 * order_number + 1000 or i == 5000 * order_number + 3000:
                        print(
                            "-------------------------------------THIS IS {}-------------------------------------"
                            .format(i))
                    if softmax(model.predict(np.expand_dims(
                            new_image, axis=0))).argmax(axis=-1) != softmax(
                                model.predict(
                                    np.expand_dims(x_train[i],
                                                   axis=0))).argmax(axis=-1):

                        nc_symbol = compare_nc(model, x_train, y_train, x_test,
                                               y_test, new_image, x_train[i],
                                               model_layer)

                        if nc_symbol == True:
                            nc_index[i] = new_image
                            nc_number += 1

                print(nc_number)

                ### save data
                folder_to_store = 'fuzzing/{}/{}/'.format(dataset, model_name)
                os.makedirs(folder_to_store, exist_ok=True)
                np.save(
                    folder_to_store + '/nc_index_{}.npy'.format(order_number),
                    nc_index)
예제 #3
0
            for i in tqdm(range(lower_bound, upper_bound, step),
                          desc="Total progress:"):

                left_idx = i
                right_idx = min(i + step, upper_bound)

                for index, (pred_new, pred_old) in enumerate(
                        zip(
                            softmax(
                                model.predict(
                                    np.array(new_images[left_idx:right_idx]))).
                            argmax(axis=-1),
                            softmax(model.predict(
                                x_test[left_idx:right_idx])).argmax(axis=-1))):
                    nc_symbol = compare_nc(model, x_train, y_train, x_test,
                                           y_test, new_images[i + index],
                                           x_test[i + index], model_layer)
                    if nc_symbol == True:
                        nc_index[i + index] = new_images[i + index]
                        nc_number += 1

            print(
                "Log: new image can cover more neurons: {}".format(nc_number))
            np.save(nc_index_path, nc_index)

    for order_number in range(2):
        index = np.load(os.path.join(
            store_path, 'nc_index_test_{}.npy'.format(order_number)),
                        allow_pickle=True).item()
        for y, x in index.items():
            x_test[y] = x
예제 #4
0
def cycle(T: int):

    # Step 1. Load the current model M_i

    current_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR,
                                                   dataset_name, model_name,
                                                   is_improve, str(0))
    # else:
    #     current_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name, model_name, is_improve, str(T-1))
    current_model = load_model(current_model_path)

    # Step 2. According to the current M_i and dataset, generate examples T_i
    ## Load the current dataset we have
    x_train, y_train, x_test, y_test = load_data(dataset_name)

    if not os.path.exists(
            os.path.join('new_test/{}/{}'.format(dataset_name, model_name),
                         'x_test_new.npy')):
        print("Generate test set")

        new_images = []
        for i in tqdm(range(len(x_test)), desc="transformation ......"):
            new_images.append(mutate(x_test[i]))

        nc_index = {}
        nc_number = 0
        for i in tqdm(range(0, len(x_test), 500), desc="Total progress:"):
            for index, (pred_new, pred_old) in enumerate(
                    zip(
                        softmax(model.predict(np.array(
                            new_images[i:i + 500]))).argmax(axis=-1),
                        softmax(model.predict(x_test[i:i +
                                                     500])).argmax(axis=-1))):
                nc_symbol = compare_nc(model, x_train, y_train, x_test, y_test,
                                       new_images[i + index],
                                       x_test[i + index], model_layer)
                if nc_symbol == True:
                    nc_index[i + index] = new_images[i + index]
                    nc_number += 1

        print("Log: new image can cover more neurons: {}".format(nc_number))
        store_path = 'new_test/{}/{}'.format(dataset_name, model_name)
        os.makedirs(store_path, exist_ok=True)
        for y, x in nc_index.items():
            x_test[y] = x
        np.save(os.path.join(store_path, 'x_test_new.npy'), x_test)

    data_folder = 'fuzzing/{}/{}/{}'.format(dataset_name, model_name,
                                            is_improve)
    os.makedirs(data_folder, exist_ok=True)
    if not T == 0:
        if not os.path.exists(os.path.join(data_folder, "new_images.npy")):
            print("Log: Start do transformation in images")
            new_images = []
            for i in tqdm(range(len(x_train))):
                new_images.append(mutate(x_train[i]))
            np.save(os.path.join(data_folder, "new_images.npy"), new_images)
        else:
            print("Log: Load mutantions.")
            new_images = np.load(os.path.join(data_folder, "new_images.npy"))

        for i in range(1, T):
            index = np.load('fuzzing/{}/{}/{}/nc_index_{}.npy'.format(
                dataset_name, model_name, is_improve, i),
                            allow_pickle=True).item()
            for y, x in index.items():
                x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)),
                                         axis=0)
                y_train = np.concatenate(
                    (y_train, np.expand_dims(y_train[y], axis=0)), axis=0)

        if not os.path.exists(
                os.path.join(data_folder, 'nc_index_{}.npy'.format(T))):
            ## Generate new examples
            nc_index = {}
            nc_number = 0
            for i in tqdm(range(5000 * (T - 1), 5000 * (T), 500),
                          desc="Total progress:"):
                for index, (pred_new, pred_old) in enumerate(
                        zip(
                            softmax(
                                current_model.predict(
                                    np.array(new_images[i:i + 500]))).argmax(
                                        axis=-1),
                            softmax(current_model.predict(
                                x_train[i:i + 500])).argmax(axis=-1))):
                    # find an adversarial example
                    if pred_new != pred_old:
                        nc_symbol = compare_nc(current_model, x_train, y_train,
                                               x_test, y_test,
                                               new_images[i + index],
                                               x_train[i + index], model_layer)
                        if nc_symbol and improve_coverage:
                            # new image can cover more neurons, and we want such improvements
                            nc_index[i + index] = new_images[i + index]
                            nc_number += 1

                        if (not improve_coverage) and (not nc_symbol):
                            # new image CANNOT cover more neurons, and we want examples cannot improve coverage
                            nc_index[i + index] = new_images[i + index]
                            nc_number += 1

            print("Log: new image can/cannot cover more neurons: {}".format(
                nc_number))

            np.save(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)),
                    nc_index)

        # Step 3. Retrain M_i against T_i, to obtain M_{i+1}
        ## Augment the newly generate examples into the training data

        index = np.load(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)),
                        allow_pickle=True).item()
        for y, x in index.items():
            x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)),
                                     axis=0)
            y_train = np.concatenate(
                (y_train, np.expand_dims(y_train[y], axis=0)), axis=0)

    # Step 4. Evaluate the current model
    ## Evaluate coverage
    print(x_train.shape)
    print("\nEvaluate coverage ......")
    evaluate_coverage(current_model, l, T, x_train, y_train, x_test, y_test)

    ## Retrain the model
    if not T == 0:
        retrained_model = retrain(current_model,
                                  x_train,
                                  y_train,
                                  x_test,
                                  y_test,
                                  batch_size=128,
                                  epochs=5)
        new_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR,
                                                   dataset_name, model_name,
                                                   is_improve, str(T))
        retrained_model.save(new_model_path)

    ## Evaluate robustness
    print("\nEvaluate robustness ......")
    store_path = 'new_test/{}/{}'.format(dataset_name, model_name)
    x_test_new = np.load(os.path.join(store_path, 'x_test_new.npy'),
                         allow_pickle=True)
    evaluate_robustness(T, current_model, x_test, y_test, x_test_new)

    print("Done\n")
예제 #5
0
def cycle(T: int):
    assert T > 0

    # Step 1. Load the current model M_i
    current_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR,
                                                   dataset_name, model_name,
                                                   is_improve, str(T - 1))
    current_model = load_model(current_model_path)

    # Step 2. According to the current M_i and dataset, generate examples T_i
    ## Load the current dataset we have
    x_train, y_train, x_test, y_test = load_data(dataset_name)
    for i in range(T - 1):
        index = np.load('fuzzing/nc_index_{}.npy'.format(i),
                        allow_pickle=True).item()
        for y, x in index.items():
            x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)),
                                     axis=0)
            y_train = np.concatenate(
                (y_train, np.expand_dims(y_train[y], axis=0)), axis=0)

    ## Generate new examples
    nc_index = {}
    nc_number = 0
    for i in range(5000 * (T - 1), 5000 * (T)):
        new_image = mutate(x_train[i])
        if i % 100 == 0:
            print('.', end='')
            break
        if softmax(current_model.predict(np.expand_dims(
                new_image, axis=0))).argmax(axis=-1) != softmax(
                    current_model.predict(np.expand_dims(
                        x_train[i], axis=0))).argmax(axis=-1):
            # find an adversarial example
            nc_symbol = compare_nc(current_model, x_train, y_train, x_test,
                                   y_test, new_image, x_train[i], model_layer)
            if nc_symbol and improve_coverage:
                # new image can cover more neurons, and we want such improvements
                nc_index[i] = new_image
                nc_number += 1

            if (not improve_coverage) and (not nc_symbol):
                # new image CANNOT cover more neurons, and we want examples cannot improve coverage
                nc_index[i] = new_image
                nc_number += 1

    print(nc_number)
    data_folder = 'fuzzing/{}/{}/{}'.format(dataset_name, model_name,
                                            is_improve)
    os.makedirs(data_folder, exist_ok=True)
    np.save(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)), nc_index)

    # Step 3. Retrain M_i against T_i, to obtain M_{i+1}
    ## Augment the newly generate examples into the training data

    index = np.load(os.path.join(data_folder, 'nc_index_{}.npy'.format(T)),
                    allow_pickle=True).item()
    for y, x in index.items():
        x_train = np.concatenate((x_train, np.expand_dims(x, axis=0)), axis=0)
        y_train = np.concatenate((y_train, np.expand_dims(y_train[y], axis=0)),
                                 axis=0)

    ## Retrain the model
    retrained_model = retrain(current_model,
                              x_train,
                              y_train,
                              x_test,
                              y_test,
                              batch_size=128,
                              epochs=5)
    new_model_path = "{}{}/{}/{}/{}.h5".format(THIS_MODEL_DIR, dataset_name,
                                               model_name, is_improve, str(T))
    retrained_model.save(new_model_path)

    # Step 4. Evaluate the current model
    ## Evaluate coverage
    evaluate_coverage(retrained_model, l, T, x_train, y_train, x_test, y_test)

    ## Evaluate robustness
    store_path = 'new_test/{}/{}'.format(dataset_name, model_name)
    x_test_new = np.load(os.path.join(store_path, 'x_test_new.npy'))
    evaluate_robustness(T, retrained_model, x_test, y_test, x_test_new)

    print("Done")