예제 #1
0
def data():
    TRAIN_DATA = 'train_raw_data.p'
    VAL_DATA = 'validate_raw_data.p'
    RANDOM_CROP_NUMBER = 1
    RESIZE_DIM = 256
    RANDOM_CROP_DIM = 224
    NEURONS = [1024]

    [x_train,y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
    [x_test,y_test] = pickle.load(open(VAL_DATA, 'rb'))
    x_train = np.array(x_train)
    x_test = np.array(x_test)
    y_train = np.array([item[0] for item in y_train])
    y_test = np.array([item[0] for item in y_test])

    x_train, y_train, x_test, y_test = augment(x_train,
        y_train, x_test, y_test, ['all', 'colour'],
        RANDOM_CROP_NUMBER, RESIZE_DIM, RANDOM_CROP_DIM)

    # Normalise input
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    y_train = y_train.astype('float32')
    # Normalise output
    y_test = y_test.astype('float32')
    y_train /= 255
    y_test /= 255

    return x_train, y_train, x_test, y_test
예제 #2
0
def supervised_train_loop(model, optimizer, train_dataset, avg_loss, mIoU,
                          iters):
    i = 0
    b = 0
    for images, labels in train_dataset:
        # with tf.device('/GPU:0'):
        images, labels = augment(images, labels)
        with tf.GradientTape() as tape:
            logits = model(images)
            preds = tf.argmax(tf.nn.softmax(logits), axis=-1)
            valid_labels, valid_logits = valid_mask_preds(labels, logits)

            loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=valid_labels, logits=valid_logits)
            loss = tf.reduce_mean(loss)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(
            grads_and_vars=zip(gradients, model.trainable_variables))

        avg_loss.update_state(loss)
        valid_lbls, valid_preds = valid_mask_preds(labels, preds)
        mIoU.update_state(valid_lbls, valid_preds)
        if 0 < FLAGS.debug_freq <= b:
            debug_plot(images, labels, preds, i, b)
            b = 0
        else:
            b += 1
        i += 1
예제 #3
0
def semisupervised_train_loop(model, optimizer, train_dataset, avg_loss, mIoU,
                              iters):
    i = 0
    b = 0
    beta_distribution = Beta(FLAGS.alpha, FLAGS.alpha)
    for X, U in train_dataset:
        images, labels = X
        unlabeled_images = U
        # with tf.device('/GPU:0'):
        images, labels = augment(images, labels)
        unlabeled_images, boxes, flip_mask = augment_image(
            unlabeled_images, FLAGS.K)
        U_final = predict_labels(FLAGS.K, boxes, flip_mask, model,
                                 unlabeled_images)

        i_c = tf.concat([images, unlabeled_images], axis=0)
        label_num = U_final.shape[-1]
        one_hot_labels = tf.one_hot(labels, depth=label_num)
        l_c = tf.concat([one_hot_labels, U_final], axis=0)
        if FLAGS.do_mixup:
            i_mix, l_mix, i_shuffled, l_shuffled = mixup(
                beta_distribution, i_c, l_c)
        else:
            i_mix, l_mix = i_c, l_c
        l_labeled = tf.stack(l_mix[:images.shape[0]])
        l_unlabeled = tf.stack(l_mix[images.shape[0]:])

        with tf.GradientTape() as tape:
            loss = combined_loss(i_mix, l_labeled, l_unlabeled, model)
            # loss = loss_s
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(
            grads_and_vars=zip(gradients, model.trainable_variables))

        avg_loss.update_state(loss)
        preds = tf.argmax(model(images), axis=-1)
        valid_lbls, valid_preds = valid_mask_preds(labels, preds)
        mIoU.update_state(valid_lbls, valid_preds)
        # for k in range(FLAGS.batch_size + FLAGS.unlabeled_batch_size * FLAGS.K):
        #     plot(i_mix[k], "out/{}/i{}_{}.png".format(FLAGS.run, k, i))
        #
        # for k in range(FLAGS.batch_size + FLAGS.unlabeled_batch_size * FLAGS.K):
        #     plot(l_c[k], "out/{}/l0{}_{}.png".format(FLAGS.run, k, i))
        #     # plot(l_shuffled[k], "out/{}/l1{}_{}.png".format(FLAGS.run, k, i))

        # if 0 < FLAGS.debug_freq <= b:
        #     debug_plot(images, labels, preds, i, b)
        #     b = 0
        # else:
        #     b += 1
        i += 1
예제 #4
0
def build_ensemble(X, y):
    augs = augment(X)
    ids = np.arange(len(augs))

    clfs = [new_classifier().fit(X, y)]

    # for r in range(1, len(augs)+1):
    for r in [1, len(augs)]:
        combos = [list(e) for e in combinations(ids, r=r)]
        for combi in combos:
            Xi, yi = concat(augs[combi]), concat([y for _ in combi])
            Xi, yi = concat([Xi, X]), concat([yi, y])
            clf = new_classifier().fit(Xi, yi)
            clfs.append(clf)

    return clfs
예제 #5
0
    def __getitem__(self, index):
        """Generate one batch of data
        :param index: index of the batch
        :return: X and y when fitting. X only when predicting
        """
        # Generate indexes of the batch
        current_indexes = list(
            range(index * self.batch_size, (index + 1) * self.batch_size))
        img_paths_temp = self.img_paths[current_indexes]

        # Generate data
        X = []
        y = []

        for path in img_paths_temp:
            _X = cv2.cvtColor(cv2.imread(self.base_path + f"/images/{path}"),
                              cv2.COLOR_BGR2RGB)
            _y = rgb_to_depth(cv2.imread(self.base_path + f"/depth/{path}"))
            _y = 1000.0 * _y

            if (np.random.random() < self.augmentation_rate):
                _X = augment(_X)

            if (np.random.random() < 0.5) and self.augmentation_rate:
                _X, _y = flip(_X, _y)

            _y = np.clip(_y, self.min_depth, self.max_depth)
            _y = DepthNorm(_y, maxDepth=self.max_depth)

            _y = resize(_y, (_X.shape[0] // 2, _X.shape[1] // 2),
                        preserve_range=True,
                        mode='reflect',
                        anti_aliasing=True)
            _y = _y.reshape(_y.shape[0], _y.shape[1], 1)
            #_y = np.log(_y)

            X.append(_X)
            y.append(_y)

        if self.to_fit:
            return (np.array(X) /
                    255).astype('float32'), np.array(y).astype('float32')
        else:
            return np.array(X).astype('float32')
예제 #6
0
	def _data_generation (self, images_to_load, corresponding_masks, shape):
		X = list(map(lambda path: cv2.imread(path), images_to_load))		
		
		'''
		X = []
		for path in images_to_load:
			X.append(img_to_array(load_img(path, target_size=(256, 256), grayscale=True)) / 255)
		'''
		
		y = list(map(lambda path: cv2.imread(path), corresponding_masks))
		
		'''
		y = []
		for path in corresponding_masks:
			y.append(img_to_array(load_img(path, target_size=(256, 256), grayscale=True )) / 255)
		'''

		for i, img in enumerate(y):
			if img is None:
				print(' [EROOR INFO]  None image Loaded:  ', corresponding_masks[i])
				exit(1)

		X,y = augment(X, y)
		X = list(map(lambda img: cv2.resize(img, shape) if img.shape!=shape else img, X))
		y = list(map(lambda img: cv2.resize(img, shape) if img.shape!=shape else img, y))
		


		X = list(map(lambda img: cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), X))
		y = list(map(lambda img: cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), y))

		# normalize input images and mask images
		X = list(map(lambda x: exposure.equalize_adapthist(x), X))
		#X = list(map(lambda x: x/255, X))
		y = list(map(lambda x: x/255, y))

		# expand dims to fit the network architecture
		X = list(map(lambda x: np.expand_dims(x,3), X))
		y = list(map(lambda x: np.expand_dims(x,3), y))

		return X,y
예제 #7
0
def data():
    TRAIN_DATA = '/vol/bitbucket/qn14/train_raw_data.p'
    VAL_DATA = '/vol/bitbucket/qn14/validate_raw_data.p'
    RANDOM_CROP_NUMBER = 2
    RESIZE_DIM = 256
    RANDOM_CROP_DIM = 224
    NO_FILTERS = [16, 32, 64]

    [x_train, y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
    [x_test, y_test] = pickle.load(open(VAL_DATA, 'rb'))
    x_train = np.array(
        [cv2.cvtColor(item, cv2.COLOR_BGR2GRAY) for item in x_train])
    x_test = np.array(
        [cv2.cvtColor(item, cv2.COLOR_BGR2GRAY) for item in x_test])
    x_train = x_train.reshape(x_train.shape + (1, ))
    x_test = x_test.reshape(x_test.shape + (1, ))
    y_train = np.array([item[0] for item in y_train])
    y_test = np.array([item[0] for item in y_test])

    x_train, y_train, x_test, y_test = augment(x_train, y_train, x_test,
                                               y_test, ['all'],
                                               RANDOM_CROP_NUMBER, RESIZE_DIM,
                                               RANDOM_CROP_DIM)

    # Normalise input
    x_train = x_train.astype('float32')
    x_test = x_test.astype('float32')
    x_train /= 255
    x_test /= 255

    y_train = y_train.astype('float32')
    # Normalise output
    y_test = y_test.astype('float32')
    y_train /= 255
    y_test /= 255

    return x_train, y_train, x_test, y_test
예제 #8
0
def main(args):

    # Hyper-parameters
    BATCH_SIZE = 128
    EPOCHS = 25
    SAVE_DIR = 'results'
    TRAIN_DATA = 'train_raw_data.p'
    VAL_DATA = 'validate_raw_data.p'
    MODEL_NAME = args[0]
    RANDOM_CROP_NUMBER = 2
    RESIZE_DIM = 256
    RANDOM_CROP_DIM = 224

    # Set up network training instance
    model = Sequential()
    if 'translate' in args or 'all' in args:
        model.add(Conv2D(32, (5, 5), padding='same',
                        input_shape=(RANDOM_CROP_DIM,RANDOM_CROP_DIM,1)))
    else:
        model.add(Conv2D(32, (5, 5), padding='same',
                        input_shape=(RESIZE_DIM,RESIZE_DIM,1)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (5, 5), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(500))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(2, kernel_initializer='normal'))
    model.add(Activation('linear'))
    
    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

    model.compile(loss='mean_squared_error', optimizer=opt)


    # Prepare data for training
    [X_train,y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
    [X_val,y_val] = pickle.load(open(VAL_DATA, 'rb'))
    X_train = np.array([cv2.cvtColor(item, cv2.COLOR_BGR2GRAY) for item in X_train])
    X_val = np.array([cv2.cvtColor(item, cv2.COLOR_BGR2GRAY) for item in X_val])
    X_train = X_train.reshape(X_train.shape + (1,))
    X_val = X_val.reshape(X_val.shape + (1,))
    y_train = np.array([item[0] for item in y_train])
    y_val = np.array([item[0] for item in y_val])
    
        
    X_train, y_train, X_val, y_val = augment(X_train, 
        y_train, X_val, y_val, args, 
        RANDOM_CROP_NUMBER, RESIZE_DIM, RANDOM_CROP_DIM)

    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)
    
    # Normalise input
    X_train = X_train.astype('float32')
    X_val = X_val.astype('float32')
    X_train /= 255
    X_val /= 255

    # Normalise output
    y_train = y_train.astype('float32')
    y_val = y_val.astype('float32')
    y_train /= 255
    y_val /= 255

    # Train the CNN
    history = customValidationCallback()
    model.fit(X_train, y_train, 
        epochs=EPOCHS, batch_size=BATCH_SIZE,
        validation_data = (X_val, y_val),
        callbacks = [history]
    )

    history_data = {
        'loss_history': history.losses,
        'val_error_means': history.val_error_means,
        'val_error_stds': history.val_error_stds, 
    }

    model.save(MODEL_NAME + '.h5')


    pickle.dump(history_data, open(SAVE_DIR + '/' + MODEL_NAME + '.p', 'wb'))

    history = None
예제 #9
0
def training(train_ds, val_ds, test_ds, model, EPOCHS):
    @tf.function
    def train_step(images, labels):
        with tf.GradientTape() as tape:
        # training=True is only needed if there are layers with different
        # behavior during training versus inference (e.g. Dropout).
            predictions, feature_map = model(images, training=True)
            loss = loss_object(labels, predictions)
        gradients = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        train_loss(loss)
        train_accuracy(labels, predictions)
        pred_label = tf.math.argmax(predictions, axis=1)
        _ = train_AUC.update_state(labels, pred_label)
        # pred_axis0, pred_axis1 = tf.unstack(predictions, axis=1)
        # _ = train_ROC.update_state(labels, pred_axis0, pred_axis1)

    @tf.function
    def val_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
        predictions = model(images, training=False)
        v_loss = loss_object(labels, predictions)

        val_loss(v_loss)
        val_accuracy(labels, predictions)

    @tf.function
    def test_step(images, labels):
    # training=False is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
        predictions = model(images, training=False)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)
        pred_label = tf.math.argmax(predictions, axis=1)
        _ = test_AUC.update_state(labels, pred_label)
        # _ = test_ROC.update_state(labels, predictions)

    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam()
    ckpt = tf.train.Checkpoint(step=tf.Variable(1), optimizer=optimizer, model=model)
    manager = tf.train.CheckpointManager(ckpt, './tf_ckpts', max_to_keep=3)

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    train_AUC = tf.keras.metrics.AUC(
                                     num_thresholds=200, curve='ROC', summation_method='interpolation')
    train_ROC = ROC()
    num_class = 2
    # train_CM = C_M(num_class)

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
    test_AUC = tf.keras.metrics.AUC(
                                    num_thresholds=200, curve='ROC', summation_method='interpolation')
    test_ROC = ROC()
    # test_CM = C_M(num_class)




    # EPOCHS = 10

    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
    test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
    val_log_dir = 'logs/gradient_tape/' + current_time + '/val'
    ROC_log_dir = 'logs/gradient_tape/' + current_time + '/ROC'
    train_summary_writer = tf.summary.create_file_writer(train_log_dir)
    test_summary_writer = tf.summary.create_file_writer(test_log_dir)
    validation_summary_writer = tf.summary.create_file_writer(val_log_dir)
    ROC_summary_writer = tf.summary.create_file_writer(ROC_log_dir)

    ckpt.restore(manager.latest_checkpoint)
    if manager.latest_checkpoint:
       print("Restored from {}".format(manager.latest_checkpoint))
    else:
       print("Initializing from scratch.")

    for epoch in range(EPOCHS):
      train_loss.reset_states()
      train_accuracy.reset_states()
      train_AUC.reset_states()
  # train_CM.reset_states()
      train_ROC.reset_states()
      test_loss.reset_states()
      test_accuracy.reset_states()
      test_AUC.reset_states()
      test_ROC.reset_states()
  # test_CM.reset_states()
      val_loss.reset_states()
      val_accuracy.reset_states()

      for sample in train_ds:
          size_shape = tf.random.uniform([], minval=200, maxval=256)
          train_img = sample[0]
          train_label = sample[2]
          train_img, train_label = augment(train_img, train_label, size_shape)
          train_step(train_img, train_label)
          predictions = model(train_img, training=True)

          label_pred = tf.math.argmax(predictions, axis=1)
          _ = train_ROC.update_state(train_label, predictions)
  #  _ = train_CM.update_state(train_label,label_pred)

      ckpt.step.assign_add(1)
           if int(ckpt.step) % 2 == 0:
              save_path = manager.save()
              print("Saved checkpoint for step {}: {}".format(int(ckpt.step), save_path))
              print(manager.checkpoints)

    #  画ROC的图并保存
      fp, tp = train_ROC.result()
      plot_roc('ROC_train', fp, tp)  # create figure & 1 axis
      plt.savefig('ROC_train_img.png')  # save the figure to file
      plt.show()
      fig = mpimg.imread('ROC_train_img.png')
      fig = tf.expand_dims(fig, 0)

      with train_summary_writer.as_default():
           tf.summary.scalar('train_loss', train_loss.result(), step=epoch)
           tf.summary.scalar('train_accuracy', train_accuracy.result(), step=epoch)
           tf.summary.scalar('AUC_train', train_AUC.result(), step=epoch)
           tf.summary.image("ROC_train", fig, step=epoch)
  #  tf.summary.image('Confusion Matrix train', fig_CM, step=epoch)
      for sample in val_ds:
          val_img = sample[0]
          val_label = sample[2]
          val_step(val_img, val_label)
      with validation_summary_writer.as_default():
           tf.summary.scalar('val_loss', val_loss.result(), step=epoch)
           tf.summary.scalar('val_accuracy', val_accuracy.result(), step=epoch)

      template = 'Epoch {}, Loss: {}, Accuracy: {}, Val Loss: {}, Val Accuracy: {}'
      print(template.format(epoch + 1,
                        train_loss.result(),
                        train_accuracy.result() * 100,
                        val_loss.result(),
                        val_accuracy.result() * 100))
예제 #10
0
def main(args):

    # Hyper-parameters
    BATCH_SIZE = 32
    EPOCHS = 25
    SAVE_DIR = 'results'
    TRAIN_DATA = 'train_raw_data.p'
    VAL_DATA = 'validate_raw_data.p'
    MODEL_NAME = args[0]
    RANDOM_CROP_NUMBER = 1
    RESIZE_DIM = 256
    RANDOM_CROP_DIM = 224

    # Set up network training instance
    base_model = DenseNet201(include_top=False,
                             input_shape=(224, 224, 3),
                             weights='imagenet')

    # x = AveragePooling2D((7, 7), name='avg_pool')(base_model.output)
    x = GlobalAveragePooling2D()(base_model.output)
    # x = Flatten()(x)
    x = Dense(1000, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(2, activation='linear')(x)

    model = Model(inputs=base_model.input, outputs=x)

    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

    model.compile(loss='mean_squared_error', optimizer=opt)

    # Prepare data for training
    [X_train, y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
    [X_val, y_val] = pickle.load(open(VAL_DATA, 'rb'))
    X_train = np.array(X_train)
    X_val = np.array(X_val)

    y_train = np.array([item[0] for item in y_train])
    y_val = np.array([item[0] for item in y_val])

    X_train, y_train, X_val, y_val = augment(X_train, y_train, X_val, y_val,
                                             args, RANDOM_CROP_NUMBER,
                                             RESIZE_DIM, RANDOM_CROP_DIM)

    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)

    # Normalise input
    n = X_train.shape[0]
    d1 = X_train[:n // 2, :].astype('float32')
    print(d1.shape)
    d2 = X_train[n // 2:, :].astype('float32')
    print(d2.shape)
    X_train = np.vstack((d1, d2))
    # X_train = X_train.astype('float32')
    X_val = X_val.astype('float32')
    X_train /= 255
    X_val /= 255

    # Normalise output
    y_train = y_train.astype('float32')
    y_val = y_val.astype('float32')
    y_train /= 255
    y_val /= 255

    # Train the CNN
    history = customValidationCallback()
    model.fit(X_train,
              y_train,
              epochs=EPOCHS,
              batch_size=BATCH_SIZE,
              validation_data=(X_val, y_val),
              callbacks=[history])

    history_data = {
        'loss_history': history.losses,
        'val_error_means': history.val_error_means,
        'val_error_stds': history.val_error_stds,
    }

    model.save(MODEL_NAME + '.h5')

    pickle.dump(history_data, open(SAVE_DIR + '/' + MODEL_NAME + '.p', 'wb'))

    history = None
예제 #11
0
def preprocess(dir, fn):
    """
    Data preprocessing function. Given input directory, image and mask data is
    copied into a new temporary working directory called "data_copy". Data from
    the input directory is copied to "data_copy" and processed.

    Parameters
    ----------
    dir : str
        String for the directory to perform training with.
    n : int
        The factor of augmented images to generate with Augmentor.

    Returns
    -------
    None : All processed images are saved into the data_copy directory.

    """

    # Insert condition to check whether the augmentation generates NaN data
    # If so, repeat the entire process again...
    nandetected = False

    while not nandetected:
        data_copy_dir = dir.strip('/')+'_WORKINGCOPY'

        training_data_dir = os.path.join(data_copy_dir, 'training_data')
        test_data_dir = os.path.join(data_copy_dir, 'test_data')

        # Clean up any old directories and create new directories
        cleanup.clean()

        os.makedirs(os.path.join(training_data_dir, 'images'))
        os.makedirs(os.path.join(training_data_dir, 'masks'))

        os.makedirs(os.path.join(test_data_dir, 'images'))
        os.makedirs(os.path.join(test_data_dir, 'masks'))

        # Make a working directory copy of data so we don't lose anything
        os.makedirs(os.path.join(data_copy_dir, 'images'))
        os.makedirs(os.path.join(data_copy_dir, 'masks'))
        copy_tree(os.path.join(dir, 'images'), os.path.join(data_copy_dir, 'images'))
        copy_tree(os.path.join(dir, 'masks'), os.path.join(data_copy_dir, 'masks'))

        print ('Performing basic rotation and flipping augmentation on data copy...')

        augmentation.basicaugment(data_copy_dir)

        print ('Done!')

        print ('Performing augmentation on data copy...')

        if fn>0:
            images_data = os.listdir(data_copy_dir+'/images/')
            n=fn*len(images_data)
            augmentation.augment(data_copy_dir,n)

            aug_images = glob.glob(data_copy_dir+'/images/images_original*')
            aug_masks = glob.glob(data_copy_dir+'/images/_groundtruth*')
            aug_images.sort(key=lambda x:x[-40:])
            aug_masks.sort(key=lambda x:x[-40:])

            for i, (image_file, mask_file) in enumerate(zip(aug_images, aug_masks)):
                shutil.move(image_file, image_file.replace('images_original_', ''))
                shutil.move(mask_file, mask_file.replace('_groundtruth_(1)_images_', '').replace('/images/', '/masks/'))

            print ('Augmented and saved with n='+str(n)+' samples!')

        print ('Randomly selecting/moving 70% training and 30% test data...')
        images_data = natsorted(os.listdir(data_copy_dir+'/images/'))
        masks_data = natsorted(os.listdir(data_copy_dir+'/masks/'))

        # Changed the sampling so they sample approximately the same distribution
        # Now sampling is 75:25
        test_images_data = images_data[::4]
        test_masks_data = [f.replace('/images/', '/masks/') for f in test_images_data]
        training_images_data = [x for x in images_data if x not in test_images_data]
        training_masks_data = [f.replace('/images/', '/masks/') for f in training_images_data]

        # Old random sampling method for 70:30 data split
        # random.shuffle(images_data)
        # training_images_data = images_data[:int(0.7*len(images_data))]
        # training_masks_data = [f.replace('/images/', '/masks/') for f in training_images_data]
        # test_images_data  = images_data[int(0.7*len(images_data)):]
        # test_masks_data = [f.replace('/images/', '/masks/') for f in test_images_data]

        for f in training_images_data:
            shutil.copy(os.path.join(data_copy_dir,'images',f), os.path.join(training_data_dir,'images',f))

        for f in training_masks_data:
            shutil.copy(os.path.join(data_copy_dir,'masks',f), os.path.join(training_data_dir,'masks',f))

        for f in test_images_data:
            shutil.copy(os.path.join(data_copy_dir,'images',f), os.path.join(test_data_dir,'images',f))

        for f in test_masks_data:
            shutil.copy(os.path.join(data_copy_dir,'masks',f), os.path.join(test_data_dir,'masks',f))

        print ('Done!')

        training_data_images = []
        training_data_masks = []
        test_data_images = []
        test_data_masks = []

        print ('Loading data...')

        for imagepath, maskpath in zip(natsorted(glob.glob(training_data_dir+'/images/*')), natsorted(glob.glob(training_data_dir+'/masks/*'))):
            image = Image.open(imagepath).resize((512, 512), resample=Image.BILINEAR)
            mask = Image.open(maskpath).resize((512, 512), resample=Image.NEAREST)
            training_data_images.append(np.array(image))
            training_data_masks.append(np.array(mask))

        for imagepath, maskpath in zip(natsorted(glob.glob(test_data_dir+'/images/*')), natsorted(glob.glob(test_data_dir+'/masks/*'))):
            image = Image.open(imagepath).resize((512, 512), resample=Image.BILINEAR)
            mask = Image.open(maskpath).resize((512, 512), resample=Image.NEAREST)
            test_data_images.append(np.array(image))
            test_data_masks.append(np.array(mask))

        training_data_images = np.array(training_data_images).astype(np.float32)
        training_data_masks = np.array(training_data_masks).astype(np.float32)
        test_data_images = np.array(test_data_images).astype(np.float32)
        test_data_masks = np.array(test_data_masks).astype(np.float32)

        print ('Done!')

        print ('Running normalisation...')

        for idx, img in enumerate(training_data_images):
            training_data_images[idx] = (img-np.min(img))/(np.max(img)-np.min(img))

        for idx, img in enumerate(training_data_masks):
            if np.sum(img) > 0:
                img[img < (np.min(img)+np.max(img))/2] = 0.
                img[img >= (np.min(img)+np.max(img))/2] = 1.
                training_data_masks[idx] = img

        for idx, img in enumerate(test_data_images):
            test_data_images[idx] = (img-np.min(img))/(np.max(img)-np.min(img))

        for idx, img in enumerate(test_data_masks):
            if np.sum(img) > 0:
                img[img < (np.min(img)+np.max(img))/2] = 0.
                img[img >= (np.min(img)+np.max(img))/2] = 1.
                test_data_masks[idx] = img

        print ('Done!')

        print ('Checking nan...')

        if np.isnan(training_data_images).any() or np.isnan(training_data_masks).any() or np.isnan(test_data_images).any() or np.isnan(test_data_masks).any():
            print ('NaN value detected. Repeating the augmentation process again...')
        else:
            nandetected = True

    print ('Done!')

    training_data_images = training_data_images[..., np.newaxis]
    training_data_masks = training_data_masks[..., np.newaxis]
    test_data_images = test_data_images[..., np.newaxis]
    test_data_masks = test_data_masks[..., np.newaxis]

    return (training_data_images, training_data_masks, test_data_images, test_data_masks)
예제 #12
0
    def generator(self):

        while True:
            batches = _make_batches(size=self.total_images,
                                    batch_size=self.batch_size)
            for start, end in batches:
                arr = []
                labels = []
                cur_batch = self.image_paths[start:end]

                for image_path in cur_batch:
                    # print image_path
                    img = imread(
                        fname=os.path.join(self.data_path, image_path))

                    # if channels are not 3
                    ndim = len(img.shape)

                    if ndim == 2:
                        img = img[..., np.newaxis]
                        img = np.tile(A=img, reps=(1, 1, 3))

                    if ndim == 4:
                        img = img[..., :3]

                    # resizing image maintaining aspect ratio
                    img = resize_image(img=img, size=self.input_size)

                    if self.training:
                        # random cropping while training
                        img = random_crop_image(img=img, size=self.input_size)
                        img = augment(img=img,
                                      horizontal_flip=True,
                                      vertical_flip=True,
                                      brightness=True,
                                      contrast=True,
                                      rotation=True,
                                      translation=True,
                                      blur=True,
                                      noise=True)
                    else:
                        # center cropping
                        h, w, c = img.shape
                        center_h = h / 2
                        center_w = w / 2
                        center_new_img = self.input_size / 2
                        new_x1 = center_w - center_new_img
                        new_y1 = center_h - center_new_img
                        new_x2 = center_w + center_new_img
                        new_y2 = center_h + center_new_img
                        if self.input_size % 2 == 1:
                            new_x2 += 1
                            new_y2 += 1
                        img = img[new_y1:new_y2, new_x1:new_x2]

                    arr.append(img)
                    cls = image_path.split('/')[0]
                    id_for_cls = self.cls2id[cls]
                    labels.append(id_for_cls)

                arr = np.array(arr)
                arr.astype('float32')

                # making mean of data 0 with standard deviation 1
                arr /= 255.
                arr -= 0.5
                arr *= 2.

                # one hot encoding
                labels = to_categorical(y=labels,
                                        num_classes=self.total_classes)
                yield (arr, labels)
예제 #13
0
def test_imgaug_on_multichannel_different():
    sample = np.ones((240, 180, 5)) * 0.5
    result = augment(sample, mode=DATA_AUGMENTATION_DIFFERENT_EACH_CHANNEL)
    assert not np.all(result[0] == result[1])
    assert result.shape == (240, 180, 5)
def main(args):

    # Hyper-parameters
    BATCH_SIZE = 128
    EPOCHS = 25
    # SAVE_DIR = '/vol/bitbucket/qn14/'
    # TRAIN_DATA = '/vol/bitbucket/qn14/train_raw_data.p'
    # VAL_DATA = '/vol/bitbucket/qn14/validate_raw_data.p'
    SAVE_DIR = 'results/'
    TRAIN_DATA = 'train_raw_data.p'
    VAL_DATA = 'validate_raw_data.p'
    MODEL_NAME = args[0]
    RANDOM_CROP_NUMBER = 1
    RESIZE_DIM = 256
    RANDOM_CROP_DIM = 224

    # Set up network training instance
    model = Sequential()
    model.add(
        Conv2D(64, (10, 10),
               strides=(3, 3),
               padding='valid',
               input_shape=(RANDOM_CROP_DIM, RANDOM_CROP_DIM, 1)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(256, (5, 5), padding='valid'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Conv2D(288, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2), strides=(1, 1)))
    model.add(Conv2D(272, (3, 3), padding='same'))
    model.add(Activation('relu'))
    model.add(Conv2D(256, (3, 3), padding='valid'))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(3584))
    model.add(Activation('relu'))
    # model.add(Dropout(0.5))
    model.add(Dense(2048))
    model.add(Activation('relu'))
    model.add(Dense(7))
    model.add(Activation('softmax'))

    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

    model.compile(loss='categorical_crossentropy', optimizer=opt)

    # Prepare data for training
    [X_train, y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
    [X_val, y_val] = pickle.load(open(VAL_DATA, 'rb'))
    X_train = np.array(X_train)
    X_val = np.array(X_val)
    X_train = np.array(
        [cv2.cvtColor(item, cv2.COLOR_BGR2GRAY) for item in X_train])
    X_val = np.array(
        [cv2.cvtColor(item, cv2.COLOR_BGR2GRAY) for item in X_val])
    X_train = X_train.reshape(X_train.shape + (1, ))
    X_val = X_val.reshape(X_val.shape + (1, ))

    y_train = np.array([cyclone_classification(item[1]) for item in y_train])
    y_val = np.array([cyclone_classification(item[1]) for item in y_val])

    X_train, y_train, X_val, y_val = augment(X_train, y_train, X_val, y_val,
                                             ['classification'],
                                             RANDOM_CROP_NUMBER, RESIZE_DIM,
                                             RANDOM_CROP_DIM)

    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)

    # Normalise input
    n = X_train.shape[0]
    d1 = X_train[:n // 2, :].astype('float32')
    print(d1.shape)
    d2 = X_train[n // 2:, :].astype('float32')
    print(d2.shape)
    X_train = np.vstack((d1, d2))
    X_val = X_val.astype('float32')

    X_train /= 255
    X_val /= 255
    y_train = np_utils.to_categorical(y_train)
    y_val = np_utils.to_categorical(y_val)

    # Train the CNN
    history = customValidationCallback()
    model.fit(X_train,
              y_train,
              epochs=EPOCHS,
              batch_size=BATCH_SIZE,
              validation_data=(X_val, y_val),
              callbacks=[history])

    history_data = {
        'f1': history.val_f1s,
        'recall': history.val_recalls,
        'precision': history.val_precisions,
        'accuracy': history.val_accuracy,
        'best_model': history.best_model
    }

    pickle.dump(history_data, open(SAVE_DIR + MODEL_NAME + '.p', 'wb'))

    history = None
TRAIN_DATA = 'train_raw_data.p'
VAL_DATA = 'validate_raw_data.p'
RANDOM_CROP_NUMBER = 1
RESIZE_DIM = 256
RANDOM_CROP_DIM = 224

[x_train, y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
[x_test, y_test] = pickle.load(open(VAL_DATA, 'rb'))
x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array([item[0] for item in y_train])
y_test = np.array([item[0] for item in y_test])

x_train, y_train, x_test, y_test = augment(x_train, y_train, x_test, y_test,
                                           ['all', 'colour'],
                                           RANDOM_CROP_NUMBER, RESIZE_DIM,
                                           RANDOM_CROP_DIM)

# Normalise input
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

y_train = y_train.astype('float32')
# Normalise output
y_test = y_test.astype('float32')
y_train /= 255
y_test /= 255

# Train the CNN
import pickle
import numpy as np
from augmentation import augment

TRAIN_DATA = 'train_raw_data.p'
VAL_DATA = 'validate_raw_data.p'
RANDOM_CROP_NUMBER = 1
RESIZE_DIM = 256
RANDOM_CROP_DIM = 224

# Prepare data for training
[X_train, y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
[X_val, y_val] = pickle.load(open(VAL_DATA, 'rb'))
X_train = np.array(X_train)
X_val = np.array(X_val)

y_train = np.array([item[0] for item in y_train])
y_val = np.array([item[0] for item in y_val])

X_train, y_train, X_val, y_val = augment(X_train, y_train, X_val, y_val,
                                         ['all', 'colour'], RANDOM_CROP_NUMBER,
                                         RESIZE_DIM, RANDOM_CROP_DIM)

result = np.abs(np.ones(y_val.shape) * 255 / 2 - y_val)
result = np.sqrt(np.dot(result**2, np.array([1, 1])))

print(np.mean(result))
print(np.std(result))
예제 #17
0
 def __init__(self, root_dir, preprocess_type, dimension):
     self.root_dir = root_dir
     self.preprocess_type = preprocess_type
     self.dimension = dimension
     self.aug_det = augmentation.augment().to_deterministic()
예제 #18
0
def test_imgaug_on_multichannel_same():
    sample = np.ones((240, 180, 5)) * 0.5
    result = augment(sample, mode=DATA_AUGMENTATION_SAME_PER_CHANNEL)
    # assert np.all(result[0] == result[1])  # cannot be ensured currently
    assert result.shape == (240, 180, 5)
예제 #19
0
def main(args):

    # Hyper-parameters
    BATCH_SIZE = 16
    EPOCHS = 25
    SAVE_DIR = '/vol/bitbucket/qn14/'
    TRAIN_DATA = '/vol/bitbucket/qn14/train_raw_data.p'
    VAL_DATA = '/vol/bitbucket/qn14/validate_raw_data.p'
    MODEL_NAME = args[0]
    RANDOM_CROP_NUMBER = 1
    RESIZE_DIM = 256
    RANDOM_CROP_DIM = 224

    # Set up network training instance
    base_model = DenseNet201(include_top=False,
                             input_shape=(224, 224, 3),
                             weights='imagenet')

    # x = AveragePooling2D((7, 7), name='avg_pool')(base_model.output)
    x = GlobalAveragePooling2D()(base_model.output)
    # x = Flatten()(x)
    x = Dense(512)(x)
    x = Activation('relu')(x)
    x = Dropout(0.38)(x)
    x = Dense(7, activation='softmax')(x)

    model = Model(inputs=base_model.input, outputs=x)

    opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)

    model.compile(loss='categorical_crossentropy', optimizer=opt)

    # Prepare data for training
    [X_train, y_train] = pickle.load(open(TRAIN_DATA, 'rb'))
    [X_val, y_val] = pickle.load(open(VAL_DATA, 'rb'))
    X_train = np.array(X_train)
    X_val = np.array(X_val)

    y_train = np.array([cyclone_classification(item[1]) for item in y_train])
    y_val = np.array([cyclone_classification(item[1]) for item in y_val])

    X_train, y_train, X_val, y_val = augment(X_train, y_train, X_val, y_val,
                                             ['classification', 'colour'],
                                             RANDOM_CROP_NUMBER, RESIZE_DIM,
                                             RANDOM_CROP_DIM)

    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)

    # Normalise input
    n = X_train.shape[0]
    d1 = X_train[:n // 2, :].astype('float32')
    print(d1.shape)
    d2 = X_train[n // 2:, :].astype('float32')
    print(d2.shape)
    X_train = np.vstack((d1, d2))
    X_val = X_val.astype('float32')
    X_train /= 255
    X_val /= 255
    y_train = np_utils.to_categorical(y_train)
    y_val = np_utils.to_categorical(y_val)

    # Train the CNN
    history = customValidationCallback()
    model.fit(X_train,
              y_train,
              epochs=EPOCHS,
              batch_size=BATCH_SIZE,
              validation_data=(X_val, y_val),
              callbacks=[history])

    history_data = {
        'f1': history.val_f1s,
        'recall': history.val_recalls,
        'precision': history.val_precisions,
        'accuracy': history.val_accuracy,
        'best_model': history.best_model
    }

    pickle.dump(history_data, open(SAVE_DIR + MODEL_NAME + '.p', 'wb'))

    history = None
예제 #20
0
def test_imgaug_on_multichannel_no():
    sample = np.random.rand(240, 180, 5)
    result = augment(sample, mode=DATA_AUGMENTATION_NO)
    assert result.shape == (240, 180, 5)
예제 #21
0
def train(d_model,
          g_model,
          gan_model,
          dataset_train,
          n_epochs=100,
          n_batch=4,
          n_aug=20):
    """
    Train the generator and discriminator models
    """
    # Extract current time for model/plot save files
    now = datetime.now()
    current_time = now.strftime("%Y-%m-%d_%H-%M-%S")

    modelsDir = os.path.join("models", f"run_{current_time}")
    os.mkdir(modelsDir)

    logsDir = os.path.join("logs", f"run_{current_time}")
    os.mkdir(logsDir)

    # determine the output square shape of the discriminator
    n_patch = d_model.output_shape[1]

    # Split training set in train and validation sets
    dataset_train, dataset_val = split_train_val(dataset_train)

    trainA_ori, trainB_ori = dataset_train
    valA, valB = dataset_val

    # Fix train- and test-dataset dimensionality issues
    trainA = np.expand_dims(trainA_ori, axis=3)
    trainB = np.expand_dims(trainB_ori, axis=3)
    valA = np.expand_dims(valA, axis=3)
    valB = np.expand_dims(valB, axis=3)

    dataset_train = [trainA, trainB]
    dataset_val = [valA, valB]

    # calculate the number of batches per training epoch
    bat_per_epo = int(len(trainA_ori) * n_aug / n_batch)  # for us: 22*n_aug

    # Define loggers for losses, images and similarity metrics
    logger_g = Logger(os.path.join(logsDir, "gen"))
    logger_d1 = Logger(os.path.join(logsDir, "dis1"))
    logger_d2 = Logger(os.path.join(logsDir, "dis2"))
    logger_im = Logger(os.path.join(logsDir, "im"))
    logger_train = Logger(os.path.join(logsDir, "ssim_train"))
    logger_val = Logger(os.path.join(logsDir, "ssim_val"))

    # manually enumerate epochs
    i = 0

    for epoch in range(n_epochs):
        print("\n" + print_style.BOLD + f"Epoch {epoch+1}/{n_epochs}:" +
              print_style.END)

        # Create list of available indices --> Images can't be used twice in the same epoch.
        available_idx = np.array(range(np.shape(dataset_train)[1]))

        # For each epoch, create a new augmented dataset
        print("Performing data augmentation... ", end="", flush=True)
        # initialize augmented dataset
        A = np.zeros((len(trainA_ori) * n_aug, 256, 256, 1))
        B = np.zeros((len(trainA_ori) * n_aug, 256, 256, 1))
        # for shuffling of all slices
        rand_i = list(range(len(trainA_ori) * n_aug))
        random.shuffle(rand_i)  # unique list of random indices
        k = 0

        for n in range(n_aug):
            for j in range(len(trainA_ori)):
                # Augment every image randomely per epoch
                trainA_aug, trainB_aug = augment(trainA_ori[j], trainB_ori[j])
                A[rand_i[k]] = trainA_aug
                B[rand_i[k]] = trainB_aug
                k += 1

        dataset_aug = [A, B]  # all trainingdata (day4 and day1)
        print("Completed")
        time.sleep(1)

        for batch in tqdm(range(bat_per_epo), ascii=True):

            # select a batch of real samples
            [X_realA, X_realB], y_real, used_idx = generate_real_samples(
                dataset_aug, n_batch, n_patch, available_idx)
            np.delete(available_idx, used_idx)

            # generate a batch of fake samples
            X_fakeB, y_fake = generate_fake_samples(g_model, X_realA, n_patch)

            # Perform actual training
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")

                # update discriminator for real samples
                d_loss1 = d_model.train_on_batch([X_realA, X_realB], y_real)
                # update discriminator for generated samples
                d_loss2 = d_model.train_on_batch([X_realA, X_fakeB], y_fake)
                # update the generator
                g_loss, _, _, _ = gan_model.train_on_batch(
                    X_realA, [y_real, X_realB, X_realB])

            # Store losses (tensorboard)
            if (i + 1) % (bat_per_epo // 50) == 0:
                logger_g.log_scalar('run_{}'.format(current_time), g_loss, i)
                logger_d1.log_scalar('run_{}'.format(current_time), d_loss1, i)
                logger_d2.log_scalar('run_{}'.format(current_time), d_loss2, i)

            # Store similarities (tensorboard)
            if (i + 1) % (bat_per_epo // 20) == 0:
                similarity_train = check_ssim(g_model, dataset_train, 1)
                similarity_val = check_ssim(g_model, dataset_val, 1)

                logger_train.log_scalar('run_{}'.format(current_time),
                                        similarity_train, i)
                logger_val.log_scalar('run_{}'.format(current_time),
                                      similarity_val, i)

            i += 1

        print('>Losses: d1[%.3f] d2[%.3f] g[%.3f]' %
              (d_loss1, d_loss2, g_loss))
        summarize_performance(i, g_model, dataset_train, dataset_val,
                              modelsDir, logger_im, current_time)

    # output current_time so a model can be selected from the correct directory
    return current_time