Ejemplo n.º 1
0
    def __init__(self, filename=None, absolute=False, tolerance=0.1):
        try:
            if filename is not None:
                f = open(filename, 'r')
                # Get the scan's pose (stored in cartesian form)
                l = f.readline()
                scan_loc = reg.findall(l)[0]
                self.posx = float(scan_loc[0])
                self.posy = float(scan_loc[1])
                self.rot = float(scan_loc[2])
                self.scan_points = []
                if absolute:
                    self.scan_points.append(polar2origincartesian(self, scan_loc[3], scan_loc[4]))
                else:
                    self.scan_points.append(polar2cartesian(scan_loc[3], scan_loc[4]))

                line = f.readline()
                # Data is in polar form 
                while line:
                    coords = map(float, reg.findall(line)[0][5:7]) 
                    if absolute:
                        self.scan_points.append(polar2origincartesian(self, coords[0], coords[1]))
                    else:
                        self.scan_points.append(polar2cartesian(coords[0], coords[1]))

                    line = f.readline()
                f.close()
        except ValueError as e:
            print "Error in file: ", filename
            raise e
        self.scan_points = subsample(self.scan_points, tolerance)
def subsample_and_serialize(data_root, in_folder, out_folder):
    """Read MatLab files, optionally attenuate and subsample, and write to TFRecords files

        NOTE - place test set files in different folder than training mat files

        Arguments:
            data_root -- root folder for project's data
            in_folder -- folder where training set .mat files are located
            out_folder -- folder where *.train and *.valid Protobuf files will be written
        """
    raw_folder = os.path.join(data_root, in_folder)
    file_names = filter(lambda file_name: file_name.endswith(".mat"),
                        os.listdir(raw_folder))
    preprocessed_dir = os.path.join(data_root, out_folder)

    if not os.path.exists(preprocessed_dir):
        os.mkdir(preprocessed_dir)

    for mat_file_name in file_names:
        train_file_name = os.path.join(preprocessed_dir,
                                       mat_file_name.replace(".mat", ".train"))
        valid_file_name = os.path.join(preprocessed_dir,
                                       mat_file_name.replace(".mat", ".valid"))
        if os.path.exists(train_file_name) and os.path.exists(valid_file_name):
            print("Skipping existing file:", train_file_name)
            print("Skipping existing file:", valid_file_name)
            continue

        label = get_label(mat_file_name)
        try:
            data = mat_to_data(os.path.join(raw_folder, mat_file_name))
        except ValueError:
            print("Skipping broken file:", mat_file_name)
            continue

        xs = data["data"]
        xs = normalize(xs)

        if SUBSAMPLE:
            xs = subsample(xs, channels=CHANNELS, rate=SUBSAMPLE_RATE)

        num_windows = xs.shape[0] // WINDOW_SIZE
        xs = np.reshape(xs, (num_windows, WINDOW_SIZE, CHANNELS))

        train_writer = tf.python_io.TFRecordWriter(train_file_name)
        valid_writer = tf.python_io.TFRecordWriter(valid_file_name)
        print("Writing file:", train_file_name)
        print("Writing file:", valid_file_name)

        for idx, x in enumerate(xs):
            example = to_example_proto(x, label)
            if idx % 20 == 0:
                valid_writer.write(example.SerializeToString())
            else:
                train_writer.write(example.SerializeToString())

        train_writer.close()
        valid_writer.close()
 def _sample_from_m_matches(m):
     indicators = tf.equal(num_matches, tf.cast(m, tf.float32))
     ### debug
     # hist = tf.bincount(tf.cast(num_matches, tf.int32), minlength=n, maxlength=n)
     #indicators = tf.Print(indicators, [m, self._k, num_matches, hist],
     #                      summarize=1000)
     ####
     if self._subsample_hard_examples:
         return util.topk_or_pad_inds_with_resampling(
             indicators, difficulties, num_samples)
     else:
         return util.subsample(indicators, num_samples)
Ejemplo n.º 4
0
    def __init__(self, filename, numpoints, samplesize=-1, tolerance=0.1):
        self.points = []
        self.minX = sys.maxint
        self.minY = sys.maxint
        self.maxX = -sys.maxint - 1
        self.maxY = -sys.maxint - 1
        try:
            if filename is not None:
                with open(filename, 'r') as f:
                    l = f.readline()
                    while l:
                        point = map(float, l.rstrip('\n').split(","))
                        if point[0] < self.minX:
                            self.minX = point[0]
                        elif point[0] > self.maxX:
                            self.maxX = point[0]
                        if point[1] < self.minY:
                            self.minY = point[1]
                        elif point[1] > self.maxY:
                            self.maxY = point[1]
                        self.points.append(point)
                        l = f.readline()
                    # Data is in polar form
        except ValueError as e:
            print "Error in file: ", filename
            raise e

        self.points = subsample(self.points, tolerance)

        dimX = self.maxX - self.minX
        dimY = self.maxY - self.minY
        Xpoints = int(
            math.sqrt(((dimX * numpoints) / dimY) + (math.pow(dimX - dimY, 2) /
                                                     (4 * (dimY**2)))) -
            ((dimX - dimY) / (2 * dimY)))
        Ypoints = int(numpoints / (Xpoints))
        self.Xstep = dimX / (Xpoints - 1)
        self.Ystep = dimY / (Ypoints - 1)

        self.grid = []
        for x in trange(Xpoints):
            row = []
            for y in range(Ypoints):
                Xrange = (x * self.Xstep, (x + 1) * self.Xstep)
                Yrange = (y * self.Xstep, (y + 1) * self.Ystep)
                hasPoint = False
                for point in self.points:
                    if inRange(point[0], Xrange) & inRange(point[1], Yrange):
                        hasPoint = True
                        break
                row.append(1 if hasPoint else 0)
            self.grid.append(row)
Ejemplo n.º 5
0
 def __init__(self, filename=None, samplesize=-1, tolerance=0.1):
     self.points = []
     try:
         if filename is not None:
             with open(filename, 'r') as f:
                 l = f.readline()
                 while l:
                     point = l.rstrip('\n').split(",")
                     self.points.append(map(float, point))
                     l = f.readline()
                 # Data is in polar form
     except ValueError as e:
         print "Error in file: ", filename
         raise e
     self.points = subsample(self.points, tolerance)
def generate_test_segment(data_root, test_folder="test"):
    """
        Emit preprocessed segment along with filename
        Already chopped up and ready to send windows into feed dict
    """
    test_path = os.path.join(data_root, test_folder)
    file_names = filter(lambda x: x.endswith(".mat"), os.listdir(test_path))

    for file_name in file_names:
        file_path = os.path.join(test_path, file_name)
        data = mat_to_data(file_path)
        segment = data["data"]

        segment = normalize(segment)

        if SUBSAMPLE:
            segment = subsample(segment,
                                channels=CHANNELS,
                                rate=SUBSAMPLE_RATE)

        num_windows = segment.shape[0] // WINDOW_SIZE
        segment = np.reshape(segment, (num_windows, WINDOW_SIZE, CHANNELS))
        yield segment, file_name
Ejemplo n.º 7
0
val_result = np.load('dataset/val_result_mtrx.npy')
annotations_val = '/root/MedleyDB_selected/Annotations/Melody_Annotations/MELODY1/val/'
val_set = PitchEstimationDataSet(annotations_val,
                                 '/root/data/val/',
                                 sr_ratio=2,
                                 audio_type='MIX')

val_pitches = []
for i in range(val_result.shape[0]):
    pitch_frame = val_result[i]
    max = -1
    max_pitch = 0
    for j in range(val_result.shape[1]):
        if (pitch_frame[j][0] > max):
            max = pitch_frame[j][0]
            max_pitch = pitch_frame[j][1]
    val_pitches.append(max_pitch)
val_pitches = np.asarray(val_pitches)

val_labels = []
for pitches in val_set.pitches:
    val_labels += pitches
val_labels = np.asarray(val_labels)

sampled_val_pitches = util.subsample(val_pitches, val_labels)

labels = range(109)
cnf_matrix = confusion_matrix(val_labels, sampled_val_pitches, labels=labels)
plot_confusion_matrix(cnf_matrix, title='cnf matrix')
Ejemplo n.º 8
0
def main(argv):

    print("Start Main")
    # Set arguments:  Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir
    epochs = FLAGS.epochs
    data_dir = FLAGS.data_dir
    save_dir = FLAGS.save_dir
    learning_rate = FLAGS.lr
    early_stop = FLAGS.early_stop
    batch_size = FLAGS.batch_size
    reg_coeff = FLAGS.reg_coeff
    split = FLAGS.split
    master = FLAGS.master
    checkpoint_path = FLAGS.checkpoint_path
    input_dir = FLAGS.input_dir
    output_dir = FLAGS.output_dir
    image_width = FLAGS.image_width
    image_height = FLAGS.image_height
    num_classes = FLAGS.num_classes
    eps = FLAGS.eps
    batch_shape = [batch_size, image_height, image_width, 3]
    num_ens = FLAGS.num_ens

    tf.logging.set_verbosity(tf.logging.INFO)

    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    if K.image_data_format() == 'channels_first':
        x_train = x_train.reshape(x_train.shape[0], 1, image_width,
                                  image_height)
        x_test = x_test.reshape(x_test.shape[0], 1, image_width, image_height)
        input_shape = (1, image_width, image_height)
    else:
        x_train = x_train.reshape(x_train.shape[0], image_width, image_height,
                                  1)
        x_test = x_test.reshape(x_test.shape[0], image_width, image_height, 1)
        input_shape = (image_width, image_height, 1)
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    #Our model architecture for MNIST dataset
    def model_arch():
        model = Sequential()
        model.add(
            Conv2D(32,
                   kernel_size=(3, 3),
                   activation='relu',
                   input_shape=input_shape))
        model.add(Conv2D(64, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.25))
        model.add(Flatten())
        model.add(Dense(128, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adadelta(),
                      metrics=['accuracy'])
        return model

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    sess = tf.Session()
    keras.backend.set_session(sess)

    x_noisy = util.add_gaussian_noise(x_train, 0,
                                      64)  #Add gaussian noise to all images
    preds_ens = np.zeros(
        (x_test.shape[0], 10)
    )  #variable to store the predictions of each model in the ensemble (10)
    max_vote_ens = np.zeros(
        x_test.shape[0]
    )  #variable to store Majority vote from all models in ensemble

    for i in range(num_ens):
        model = model_arch(
        )  #Build a new model architecture for every model in the ensemble
        sub_imgs, sub_labels = util.subsample(
            x_noisy, y_train)  #subsample from the entire data, bagging
        model.fit(sub_imgs,
                  sub_labels,
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=1)  #train the model
        model.save("models/mnist/" + str(i) + ".h5")  #save the model
        ans = sess.run(tf.argmax(model.predict(x_test),
                                 axis=1))  #get the predictions of the model
        preds_ens[:, i] = ans.reshape(
            (x_test.shape[0])
        )  #store the predictions of this particular model(i) in ith column of pred_ens variable
        del model  #erase the model

    #Now the variable pred_ens consists of the predictions of all test_data for each model in ensemble.
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc = np.zeros(num_ens)
    for i in range(num_ens):
        for j in range(preds_ens.shape[0]):
            b = Counter(
                preds_ens[j][0:i + 1]
            )  #get the entire row which consists of predictions for that particular instance from all models.
            max_vote_ens[j] = b.most_common(1)[0][
                0]  #get the maximum vote i.e which number has more frequency.
        ens_acc_i = sess.run(
            tf.reduce_mean(
                tf.cast(tf.equal(max_vote_ens, tf.argmax(y_test, axis=1)),
                        tf.float32)))
        ens_acc[i] = ens_acc_i  #accuracy of ensemble
        #TODO print the nonperturbed test accuracy to the output file.

    #Build a model for normal training on the entire noisy data.
    model = model.model_arch()
    model.fit(x_noisy,
              y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    acc = model.evaluate(x_test, y_test, verbose=0)
    acc_noisy_normal = acc[1]  #accuracy of normal model on noisy train data
    del model

    #Build a new model for normal training (without ensemble) on entire train data (with out bagging and noise).
    model = model_arch()
    model.fit(x_train,
              y_train,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    acc = model.evaluate(x_test, y_test, verbose=0)
    model.save("models/original_model.h5")

    #accuracy of normal model
    acc_normal = acc[1]

    #generate fgsm adversarial examples on test_data
    adv_fgsm = util.fgsm_attack(x_test, model, sess)
    acc_fgsm = model.evaluate(adv_fgsm, y_test, verbose=0)
    acc_fgsm = acc_fgsm[
        1]  #accuracy of normal model on fgsm adversarial examples

    #generate bim adversarial examples on test_data
    adv_bim = util.bim_attack(x_test, model, sess)
    acc_bim = model.evaluate(adv_bim, y_test, verbose=0)
    acc_bim = acc_bim[1]  #accuracy of normal model on bim adversarial examples

    #generate lbfgs adversarial examples on test_data
    # The target is chosen as 6
    adv_lbfgs = util.lbfgs_attack(x_test, model, sess, 6)
    acc_lbfgs = model.evaluate(adv_lbfgs, y_test, verbose=0)
    acc_lbfgs = acc_lbfgs[
        1]  #accuracy of normal model on lbfgs adversarial examples

    preds_ens_fgsm = np.zeros(
        (x_test.shape[0], 10)
    )  #variable to store the predictions of each model in the ensemble (10) for fgsm adversarial examples
    max_vote_ens_fgsm = np.zeros(
        x_test.shape[0]
    )  #variable to store Majority vote from all models in ensemble for fgsm adversarial examples
    preds_ens_bim = np.zeros(
        (x_test.shape[0], 10)
    )  #variable to store the predictions of each model in the ensemble (10) for bim adversarial examples
    max_vote_ens_bim = np.zeros(
        x_test.shape[0]
    )  #variable to store Majority vote from all models in ensemble for bim adversarial examples
    preds_ens_lbfgs = np.zeros(
        (x_test.shape[0], 10)
    )  #variable to store the predictions of each model in the ensemble (10) for lbfgs adversarial examples
    max_vote_ens_lbfgs = np.zeros(
        x_test.shape[0]
    )  #variable to store Majority vote from all models in ensemble for lbfgs adversarial examples

    del model

    for i in range(num_ens):
        model = load_model("models/" + str(i) + ".h5")
        #get predictions of model i for fgsm adversarial examples
        ans = sess.run(tf.argmax(model.predict(adv_fgsm), axis=1))
        preds_ens_fgsm[:, i] = ans.reshape((adv_fgsm.shape[0]))
        #get predictions of model i for bim adversarial examples
        ans = sess.run(tf.argmax(model.predict(adv_bim), axis=1))
        preds_ens_bim[:, i] = ans.reshape((adv_bim.shape[0]))
        #get predictions of model i for lbfgs adversarial examples
        ans = sess.run(tf.argmax(model.predict(adv_lbfgs), axis=1))
        preds_ens_lbfgs[:, i] = ans.reshape((adv_lbfgs.shape[0]))
        del model

    #Now the variable pred_ens consists of the predictions of all fgsm adversarial test_data for each model in ensemble.
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc_fgsm = np.zeros(num_ens)
    for i in range(num_ens):
        for j in range(preds_ens_fgsm.shape[0]):
            b = Counter(
                preds_ens_fgsm[j][0:i + 1]
            )  #get the entire row which consists of predictions for that particular instance from all models.
            max_vote_ens_fgsm[j] = b.most_common(1)[0][
                0]  #get the maximum vote i.e which number has more frequency.
        #accuracy of ensemble
        ens_acc_fgsm_i = sess.run(
            tf.reduce_mean(
                tf.cast(tf.equal(max_vote_ens_fgsm, tf.argmax(y_test, axis=1)),
                        tf.float32)))
        ens_acc_fgsm[i] = ens_acc_fgsm_i

    #Now the variable pred_ens consists of the predictions of all bim adversarial test_data for each model in ensemble.
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc_bim = np.zeros(num_ens)
    for i in range(num_ens):
        for j in range(preds_ens_bim.shape[0]):
            b = Counter(preds_ens_bim[j][0:i + 1])
            max_vote_ens_bim[j] = b.most_common(1)[0][0]
        #accuracy of ensemble on bim_adv
        ens_acc_bim_i = sess.run(
            tf.reduce_mean(
                tf.cast(tf.equal(max_vote_ens_bim, tf.argmax(y_test, axis=1)),
                        tf.float32)))
        ens_acc_bim[i] = ens_acc_bim_i

    #Now the variable pred_ens consists of the predictions of all lbfgs adversarial test_data for each model in ensemble.
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc_lbfgs = np.zeros(num_ens)
    for i in range(num_ens):
        for i in range(preds_ens_lbfgs.shape[0]):
            b = Counter(preds_ens_lbfgs[j][0:i + 1])
            max_vote_ens_lbfgs[j] = b.most_common(1)[0][0]
        #accuracy of ensemble on lbfgs_adv
        ens_acc_lbfgs_i = sess.run(
            tf.reduce_mean(
                tf.cast(
                    tf.equal(max_vote_ens_lbfgs, tf.argmax(y_test, axis=1)),
                    tf.float32)))
        ens_acc_lbfgs[i] = ens_acc_lbfgs_i

    #-----------------------------------Adversarial Training--------------------------------------------------------------
    #first adversarial examples are generated using train_data, then the model is trained on train_data+adv_train_data.
    #Then the model is tested on normal test_data, then the model is tested on adversarial_test_data.
    #So, we are generating the adversarial examples twice both on train and test data.

    model = load_model("models/original_model.h5")
    wrap = KerasModelWrapper(model)

    #generate adversarial examples on train data.
    adv_fgsm_train = util.fgsm_attack(x_train, model, sess)
    adv_bim_train = util.bim_attack(x_train, model, sess)
    adv_lbfgs_train = util.lbfgs_attack(x_train, model, sess, 6)
    train_plus_adv_fgsm = np.concatenate([x_train, adv_fgsm_train])
    y_train_plus_adv_fgsm = np.concatenate([y_train, y_train])
    train_plus_adv_bim = np.concatenate([x_train, adv_bim_train])
    y_train_plus_adv_bim = np.concatenate([y_train, y_train])
    train_plus_adv_lbfgs = np.concatenate([x_train, adv_lbfgs_train])
    y_train_plus_adv_lbfgs = np.concatenate([y_train, y_train])
    del model

    #FGSM TRAINING
    #build a fresh model for fgsm training
    model = model_arch()
    wrap = KerasModelWrapper(model)
    model.fit(train_plus_adv_fgsm,
              y_train_plus_adv_fgsm,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    model.save("models/mnist_fgsm_model.h5")
    fgsm_acc_train = model.evaluate(x_test, y_test, verbose=0)
    fgsm_acc_train[
        1]  #Accuracy of adversarially trained model on clean examples

    #generate adversarial examples for adversarially trained model on test_data
    adv_fgsm_test = util.fgsm_attack(x_test, model, sess)
    fgsm_adv_acc_train = model.evaluate(adv_fgsm_test, y_test, verbose=0)
    fgsm_adv_acc_train[
        1]  #Accuracy of adversarially trained model on adv_test images

    del model

    #BIM TRAINING
    #build a fresh model for bim training
    model = model_arch()
    wrap = KerasModelWrapper(model)
    model.fit(train_plus_adv_bim,
              y_train_plus_adv_bim,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    bim_acc_train = model.evaluate(x_test, y_test, verbose=0)
    bim_acc_train[
        1]  #Accuracy of adversarially trained model on clean examples

    #generate adversarial examples for adversarially trained model on test_data
    adv_bim_test = util.bim_attack(x_test, model, sess)
    bim_adv_acc_train = model.evaluate(adv_bim_test, y_test, verbose=0)
    bim_adv_acc_train[
        1]  #Accuracy of adversarially trained model on adv_test images

    del model

    #LBFGS TRAINING
    #build a fresh model for lbfgs training
    model = model_arch()
    wrap = KerasModelWrapper(model)
    model.fit(train_plus_adv_lbfgs,
              y_train_plus_adv_lbfgs,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    lbfgs_acc_train = model.evaluate(x_test, y_test, verbose=0)

    #Accuracy of adversarially trained model on clean examples
    lbfgs_acc_train[1]
    adv_lbfgs_test = util.lbfgs_attack(x_test, model, sess, 6)
    lbfgs_adv_acc_train = model.evaluate(adv_lbfgs_test, y_test, verbose=0)
    lbfgs_adv_acc_train[
        1]  #Accuracy of adversarially trained model on adv_test images

    del model
Ejemplo n.º 9
0
def main(argv):

    print("Start Main")
    # Set arguments:  Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir
    data_dir = FLAGS.data_dir
    save_dir = FLAGS.save_dir
    learning_rate = FLAGS.lr
    early_stop = FLAGS.early_stop
    batch_size = FLAGS.batch_size
    reg_coeff = FLAGS.reg_coeff
    split = FLAGS.split
    master = FLAGS.master
    checkpoint_path = FLAGS.checkpoint_path
    input_dir = FLAGS.input_dir
    output_dir = FLAGS.output_dir
    image_width = FLAGS.image_width
    image_height = FLAGS.eps
    num_classes = FLAGS.num_classes
    eps = FLAGS.eps
    batch_shape = [batch_size, image_height, image_width, 3]
    input_shape = [image_height, image_width, 3]

    tf.logging.set_verbosity(tf.logging.INFO)

    def model_arch():
        model = Sequential()
        model.add(
            Conv2D(50,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(100, (5, 5), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(200, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))
        model.add(Flatten())
        model.add(Dense(400, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(200, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adadelta(),
                      metrics=['accuracy'])
        return model

    model = model_arch

    #load training data
    imgs, labels, names = util.load_training_images('tiny-imagenet-200/train/')
    print("Training Images Loaded")

    #retrype and resize training data
    imgs = imgs[0:100]
    labels = labels[0:100]
    names = names[0:100]
    imgs_large = np.ndarray(shape=[imgs.shape[0], 299, 299, 3])
    for i in range(imgs.shape[0]):
        imgs_large[i, :, :, :] = util.rescale(imgs[i])
    imgs_large = imgs_large.astype('uint8')
    imgs_noisy = np.ndarray(shape=imgs_large.shape)
    for i in range(imgs_large.shape[0]):
        imgs_noisy[i, :, :, :] = util.noisy(1, imgs_large[i])
    imgs_noisy = imgs_noisy.astype('uint8')
    sub_imgs, sub_labels = util.subsample(imgs_noisy, labels)
    batch_shape = [20, 299, 299, 3]
    num_classes = 200
Ejemplo n.º 10
0
def main(argv):

    print("Start Main")
    # Set arguments:  Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir    
    data_dir = FLAGS.data_dir
    save_dir = FLAGS.save_dir
    learning_rate = FLAGS.lr
    early_stop = FLAGS.early_stop
    batch_size = FLAGS.batch_size
    epochs = FLAGS.epochs
    reg_coeff = FLAGS.reg_coeff
    split = FLAGS.split
    master = FLAGS.master
    checkpoint_path = FLAGS.checkpoint_path
    input_dir = FLAGS.input_dir
    output_dir = FLAGS.output_dir
    image_width = FLAGS.image_width
    image_height = FLAGS.eps
    num_classes = FLAGS.num_classes
    eps = FLAGS.eps
    batch_shape = [batch_size, image_height, image_width, 3]
    input_shape = [image_height, image_width, 3]
    num_ens = FLAGS.num_ens

    tf.logging.set_verbosity(tf.logging.INFO)

    def model_arch():
        model = Sequential()
        model.add(Conv2D(50, kernel_size=(5, 5), activation='relu', input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(100, (5, 5), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(200, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))
        model.add(Flatten())
        model.add(Dense(400, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(200, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                    optimizer=keras.optimizers.Adadelta(),
                    metrics=['accuracy'])
        return model

    model = model_arch

    #load training data
    x_train,y_train,train_names = util.load_training_images('tiny-imagenet-200/train/')
    print("Training Images Loaded")
    
    x_test,y_test,test_names = util.load_training_images('tiny-imagenet-200/test/')
    print("Testing Images Loaded")

    #retrype and resize training data
    x_train = x_train[0:100]
    y_train = y_train[0:100]
    train_names = train_names[0:100]
    x_train_large = np.ndarray(shape= [x_train.shape[0],299,299,3])
    for i in range(x_train.shape[0]):
        x_train_large[i,:,:,:] = util.rescale(x_train[i])
    x_train_large=x_train_large.astype('uint8')
    x_train_noisy = np.ndarray(shape= x_train_large.shape)
    for i in range(x_train_large.shape[0]):
        x_train_noisy[i,:,:,:] = util.noisy(1,x_train_large[i])
    x_train_noisy=x_train_noisy.astype('uint8')
    x_train_sub,y_train_sub = util.subsample(x_train_noisy,y_train)
    batch_shape = [20, 299, 299, 3]
    num_classes = 200

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    sess = tf.Session()
    keras.backend.set_session(sess)
    
    x_noisy = util.add_gaussian_noise(x_train,0,64) #Add gaussian noise to all images
    preds_ens = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10)
    max_vote_ens = np.zeros(x_test.shape[0])  #variable to store Majority vote from all models in ensemble

    for i in range(num_ens):
         model = model_arch() #Build a new model architecture for every model in the ensemble
         x_train_sub,y_train_sub = util.subsample(x_train_noisy,y_train) #subsample from the entire data, bagging
         model.fit(x_train_sub, y_train_sub, batch_size=batch_size,epochs=epochs,verbose=1) #train the model
         model.save("models/imgnet/"+str(i)+".h5") #save the model
         ans = sess.run(tf.argmax(model.predict(x_test),axis=1))  #get the predictions of the model
         preds_ens[:,i]= ans.reshape((x_test.shape[0])) #store the predictions of this particular model(i) in ith column of pred_ens variable
         del model #erase the model

    #Now the variable pred_ens consists of the predictions of all test_data for each model in ensemble.
    #ith column contains predictions of ith model.
    #go through every row
    print("Ensemble method Clean")
    ens_acc = np.zeros(num_ens)
    for i in range(num_ens):
        for j in range(preds_ens.shape[0]):
            b= Counter(preds_ens[j][0:i+1]) #get the entire row which consists of predictions for that particular instance from all models.
            max_vote_ens[j] = b.most_common(1)[0][0] #get the maximum vote i.e which number has more frequency.
        ens_acc_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens, tf.argmax(y_test, axis=1)) , tf.float32)))
        ens_acc[i] = ens_acc_i #accuracy of ensemble
        #TODO print the nonperturbed test accuracy to the output file.
    print("Accuracy : " + str(np.mean(ens_acc)))

    #Build a model for normal training on the entire noisy data.
    model = model.model_arch()
    model.fit(x_train_noisy, y_train, batch_size=batch_size, epochs=epochs, verbose=1)
    acc = model.evaluate(x_test, y_test, verbose=0)
    acc_noisy_normal = acc[1] #accuracy of normal model on noisy train data
    del model

    #Build a new model for normal training (without ensemble) on entire train data (with out bagging and noise).
    model = model_arch()
    model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1)
    acc = model.evaluate(x_test, y_test, verbose=0)
    model.save("models/imgnet/original_model.h5")

    #accuracy of normal model
    acc_normal = acc[1]
    print("accuracy of normal model : " + str(acc_normal))
    print("accuracy of normal model on noisy train data : " + str(acc_noisy_normal))

    #generate fgsm adversarial examples on test_data
    adv_fgsm = util.fgsm_attack(x_test,model,sess)
    acc_fgsm = model.evaluate(adv_fgsm, y_test, verbose=0)
    acc_fgsm = acc_fgsm[1]  
    print("accuracy of normal model on fgsm adversarial examples : " + str(acc_fgsm))

    #generate bim adversarial examples on test_data
    adv_bim = util.bim_attack(x_test,model,sess)
    acc_bim = model.evaluate(adv_bim,y_test,verbose=0)
    acc_bim = acc_bim[1] #accuracy of normal model on bim adversarial examples
    print("accuracy of normal model on bim adversarial examples : " + str(acc_bim))

    #generate lbfgs adversarial examples on test_data
    # The target is chosen as 6
    adv_lbfgs = util.lbfgs_attack(x_test,model,sess,6)
    acc_lbfgs = model.evaluate(adv_lbfgs,y_test,verbose=0)
    acc_lbfgs = acc_lbfgs[1] #accuracy of normal model on lbfgs adversarial examples
    print("accuracy of normal model on lbfgs adversarial examples : " + str(acc_lbfgs))

    preds_ens_fgsm = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) for fgsm adversarial examples
    max_vote_ens_fgsm = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for fgsm adversarial examples
    preds_ens_bim = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) for bim adversarial examples
    max_vote_ens_bim = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for bim adversarial examples
    preds_ens_lbfgs = np.zeros((x_test.shape[0],10)) #variable to store the predictions of each model in the ensemble (10) for lbfgs adversarial examples
    max_vote_ens_lbfgs = np.zeros(x_test.shape[0]) #variable to store Majority vote from all models in ensemble for lbfgs adversarial examples

    del model

    for i in range(num_ens):
        model = load_model("models/"+str(i)+".h5")
        #get predictions of model i for fgsm adversarial examples
        ans = sess.run(tf.argmax(model.predict(adv_fgsm),axis=1))
        preds_ens_fgsm[:,i]= ans.reshape((adv_fgsm.shape[0]))
        #get predictions of model i for bim adversarial examples
        ans = sess.run(tf.argmax(model.predict(adv_bim),axis=1)) 
        preds_ens_bim[:,i]= ans.reshape((adv_bim.shape[0]))
        #get predictions of model i for lbfgs adversarial examples
        ans = sess.run(tf.argmax(model.predict(adv_lbfgs),axis=1))
        preds_ens_lbfgs[:,i]= ans.reshape((adv_lbfgs.shape[0]))
        del model

    print("Now the variable pred_ens consists of the predictions of all fgsm adversarial test_data for each model in ensemble.")
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc_fgsm = np.zeros(num_ens)
    for i in range(num_ens):
        for j in range(preds_ens_fgsm.shape[0]):
            b= Counter(preds_ens_fgsm[j][0:i+1])  #get the entire row which consists of predictions for that particular instance from all models.
            max_vote_ens_fgsm[j] = b.most_common(1)[0][0] #get the maximum vote i.e which number has more frequency.
        #accuracy of ensemble
        ens_acc_fgsm_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens_fgsm, tf.argmax(y_test, axis=1)) , tf.float32)))
        ens_acc_fgsm[i] = ens_acc_fgsm_i
    print(str(np.mean(ens_acc_fgsm)))

    print("Now the variable pred_ens consists of the predictions of all bim adversarial test_data for each model in ensemble.")
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc_bim = np.zeros(num_ens)
    for i in range(num_ens):
        for j in range(preds_ens_bim.shape[0]):
            b= Counter(preds_ens_bim[j][0:i+1])
            max_vote_ens_bim[j] = b.most_common(1)[0][0]
        #accuracy of ensemble on bim_adv
        ens_acc_bim_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens_bim, tf.argmax(y_test, axis=1)) , tf.float32)))
        ens_acc_bim[i] = ens_acc_bim_i
    print(str(np.mean(ens_acc_bim)))

    print("Now the variable pred_ens consists of the predictions of all lbfgs adversarial test_data for each model in ensemble.")
    #ith column contains predictions of ith model.
    #go through every row
    ens_acc_lbfgs = np.zeros(num_ens)
    for i in range(num_ens):
        for i in range(preds_ens_lbfgs.shape[0]):
            b= Counter(preds_ens_lbfgs[j][0:i+1])
            max_vote_ens_lbfgs[j] = b.most_common(1)[0][0]
        #accuracy of ensemble on lbfgs_adv
        ens_acc_lbfgs_i = sess.run(tf.reduce_mean(tf.cast(tf.equal(max_vote_ens_lbfgs, tf.argmax(y_test, axis=1)) , tf.float32)))
        ens_acc_lbfgs[i] = ens_acc_lbfgs_i
    print(str(np.mean(ens_acc_lbfgs)))
Ejemplo n.º 11
0
    labels = util.make_justpot(labels)

if args.pot_part:
    partitions = []
    no_pot_imgs = util.filter_no_pot(labels, image_paths)
    for color, subjs in util.pot_map.items():
        subj_imgs = []
        subj_imgs.extend([
            x for x in image_paths
            if os.path.basename(x).startswith(tuple(subjs))
        ])
        imgs = list(set(no_pot_imgs + subj_imgs))
        if args.make_uniform:
            imgs = util.make_uniform(imgs, labels)

        train, test = util.subsample(imgs, percent=args.samp_percent)

        partitions.append((color, train, test))

    for p in partitions:
        labels = util.read_labels(os.path.join(annot_input_dir, "labels"))

        gen_and_write(output_dir,
                      exp_num,
                      p[1],
                      labels,
                      mode=f"{p[0]}_train",
                      args=args)
        gen_and_write(output_dir,
                      exp_num,
                      p[2],
Ejemplo n.º 12
0
def main(argv):

    print("Start Main")
    # Set arguments:  Save_Dir Structure Learning_Rate Earling_Stoping Batch_Size Data_Dir
    data_dir = FLAGS.data_dir
    save_dir = FLAGS.save_dir
    learning_rate = FLAGS.lr
    early_stop = FLAGS.early_stop
    batch_size = FLAGS.batch_size
    epochs = FLAGS.epochs
    reg_coeff = FLAGS.reg_coeff
    split = FLAGS.split
    master = FLAGS.master
    checkpoint_path = FLAGS.checkpoint_path
    input_dir = FLAGS.input_dir
    output_dir = FLAGS.output_dir
    image_width = FLAGS.image_width
    image_height = FLAGS.eps
    num_classes = FLAGS.num_classes
    eps = FLAGS.eps
    batch_shape = [batch_size, image_height, image_width, 3]
    input_shape = [image_height, image_width, 3]
    num_ens = FLAGS.num_ens

    tf.logging.set_verbosity(tf.logging.INFO)

    def model_arch():
        model = Sequential()
        model.add(
            Conv2D(50,
                   kernel_size=(5, 5),
                   activation='relu',
                   input_shape=input_shape))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(100, (5, 5), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Conv2D(200, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.5))
        model.add(Flatten())
        model.add(Dense(400, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(200, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(num_classes, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy,
                      optimizer=keras.optimizers.Adadelta(),
                      metrics=['accuracy'])
        return model

    model = model_arch

    #load training data
    x_train, y_train, train_names = util.load_training_images(
        'tiny-imagenet-200/train/')
    print("Training Images Loaded")

    x_test, y_test, test_names = util.load_training_images(
        'tiny-imagenet-200/test/')
    print("Testing Images Loaded")

    #retrype and resize training data
    x_train = x_train[0:100]
    y_train = y_train[0:100]
    train_names = train_names[0:100]
    x_train_large = np.ndarray(shape=[x_train.shape[0], 299, 299, 3])
    for i in range(x_train.shape[0]):
        x_train_large[i, :, :, :] = util.rescale(x_train[i])
    x_train_large = x_train_large.astype('uint8')
    x_train_noisy = np.ndarray(shape=x_train_large.shape)
    for i in range(x_train_large.shape[0]):
        x_train_noisy[i, :, :, :] = util.noisy(1, x_train_large[i])
    x_train_noisy = x_train_noisy.astype('uint8')
    x_train_sub, y_train_sub = util.subsample(x_train_noisy, y_train)
    batch_shape = [20, 299, 299, 3]
    num_classes = 200

    y_train = keras.utils.to_categorical(y_train, num_classes)
    y_test = keras.utils.to_categorical(y_test, num_classes)
    sess = tf.Session()
    keras.backend.set_session(sess)

    #-----------------------------------Adversarial Training--------------------------------------------------------------
    #first adversarial examples are generated using train_data, then the model is trained on train_data+adv_train_data.
    #Then the model is tested on normal test_data, then the model is tested on adversarial_test_data.
    #So, we are generating the adversarial examples twice both on train and test data.

    model = load_model("models/imgnet/original_model.h5")
    wrap = KerasModelWrapper(model)

    #generate adversarial examples on train data.
    adv_fgsm_train = util.fgsm_attack(x_train, model, sess)
    adv_bim_train = util.bim_attack(x_train, model, sess)
    adv_lbfgs_train = util.lbfgs_attack(x_train, model, sess, 6)
    train_plus_adv_fgsm = np.concatenate([x_train, adv_fgsm_train])
    y_train_plus_adv_fgsm = np.concatenate([y_train, y_train])
    train_plus_adv_bim = np.concatenate([x_train, adv_bim_train])
    y_train_plus_adv_bim = np.concatenate([y_train, y_train])
    train_plus_adv_lbfgs = np.concatenate([x_train, adv_lbfgs_train])
    y_train_plus_adv_lbfgs = np.concatenate([y_train, y_train])
    del model

    print("FGSM TRAINING")
    #build a fresh model for fgsm training
    model = model_arch()
    wrap = KerasModelWrapper(model)
    model.fit(train_plus_adv_fgsm,
              y_train_plus_adv_fgsm,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    model.save("models/imgnet/fgsm_model.h5")
    fgsm_acc_train = model.evaluate(x_test, y_test, verbose=0)
    fgsm_acc_train[
        1]  #Accuracy of adversarially trained model on clean examples

    #generate adversarial examples for adversarially trained model on test_data
    adv_fgsm_test = util.fgsm_attack(x_test, model, sess)
    fgsm_adv_acc_train = model.evaluate(adv_fgsm_test, y_test, verbose=0)
    fgsm_adv_acc_train[
        1]  #Accuracy of adversarially trained model on adv_test images

    del model

    print("BIM TRAINING")  #BIM TRAINING
    #build a fresh model for bim training
    model = model_arch()
    wrap = KerasModelWrapper(model)
    model.fit(train_plus_adv_bim,
              y_train_plus_adv_bim,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    bim_acc_train = model.evaluate(x_test, y_test, verbose=0)
    print("Accuracy of adversarially trained model on clean examples\n" +
          str(bim_acc_train[1]))

    #generate adversarial examples for adversarially trained model on test_data
    adv_bim_test = util.bim_attack(x_test, model, sess)
    bim_adv_acc_train = model.evaluate(adv_bim_test, y_test, verbose=0)
    print("Accuracy of adversarially trained model on adv_test images\n" +
          str(bim_adv_acc_train[1]))

    del model

    print("LBFGS TRAINING")
    #build a fresh model for lbfgs training
    model = model_arch()
    wrap = KerasModelWrapper(model)
    model.fit(train_plus_adv_lbfgs,
              y_train_plus_adv_lbfgs,
              batch_size=batch_size,
              epochs=epochs,
              verbose=1)
    print("Accuracy of adversarially trained model on clean examples")
    lbfgs_acc_train = model.evaluate(x_test, y_test, verbose=0)
    print(str(lbfgs_acc_train[1]))

    print("Accuracy of adversarially trained model on lbfgs examples")
    lbfgs_acc_train[1]
    adv_lbfgs_test = util.lbfgs_attack(x_test, model, sess, 6)
    lbfgs_adv_acc_train = model.evaluate(adv_lbfgs_test, y_test, verbose=0)
    print(str(lbfgs_adv_acc_train[1])
          )  #Accuracy of adversarially trained model on adv_test images

    del model