Beispiel #1
0
def get_data_queue(args, coord, is_training=True):
    h, w = map(int, args.input_size.split(','))
    input_size_img = (h, w)
    input_size_label = (h / FEATSTRIDE, w / FEATSTRIDE)

    # Load reader.
    if is_training:
        with tf.name_scope("create_train_inputs"):
            reader_train = ImageReader(
                args.data_dir,
                args.data_train_list,
                input_size_img,
                input_size_label,
                RANDOM_SCALE,
                IMG_MEAN,
                coord)
            image_batch_train, label_batch_train = reader_train.dequeue(args.batch_size)
            return image_batch_train, label_batch_train
    else:
        with tf.name_scope("create_val_inputs"):
            reader_val = ImageReader(
                args.data_dir,
                args.data_val_list,
                input_size_img,
                input_size_label,
                False,
                IMG_MEAN,
                coord)
            image_batch_val, label_batch_val = reader_val.dequeue(args.batch_size, is_training=False)
            return image_batch_val, label_batch_val
Beispiel #2
0
def eval(net, data_dict, ensemble_num, recalls):
    net.eval()
    data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'test'))
    data_loader = DataLoader(data_set,
                             BATCH_SIZE,
                             shuffle=False,
                             num_workers=8)

    features = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            out = net(inputs.to(DEVICE))
            out = F.normalize(out)
            features.append(out.cpu())
    features = torch.cat(features, 0)
    torch.save(
        features,
        'results/{}_test_features_{:03}.pth'.format(DATA_NAME, ensemble_num))
    # load feature vectors
    features = [
        torch.load('results/{}_test_features_{:03}.pth'.format(DATA_NAME, d))
        for d in range(1, ensemble_num + 1)
    ]
    features = torch.cat(features, 1)
    acc_list = recall(features, data_set.labels, rank=recalls)
    desc = ''
    for index, recall_id in enumerate(recalls):
        desc += 'R@{}:{:.2f}% '.format(recall_id, acc_list[index] * 100)
    print(desc)
Beispiel #3
0
    def predict_setup(self):
        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.test_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        image_batch, label_batch = tf.expand_dims(image,
                                                  dim=0), tf.expand_dims(label,
                                                                         dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)

        # Predictions.
        global raw_output_
        raw_output = tf.nn.softmax(net.outputs, axis=-1)
        prior1 = self.conf.prior
        prior = 1 - prior1
        class0, class1 = tf.split(raw_output, 2, -1)
        class0 = (class0 * prior) / (class0 * prior + class1 * prior1)
        class1 = (class1 * prior1) / (class0 * prior + class1 * prior1)
        raw_output = tf.concat([class0, class1], -1)

        raw_output_ = raw_output
        raw_output = tf.image.resize_bilinear(raw_output,
                                              tf.shape(image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

        # Create directory
        if not os.path.exists(self.conf.out_dir + '/prediction' + '/' +
                              str(prior1)):
            #os.makedirs(self.conf.out_dir)
            os.makedirs(self.conf.out_dir + '/prediction' + '/' + str(prior1))
            if self.conf.visual:
                os.makedirs(self.conf.out_dir + '/visual_prediction' + '/' +
                            str(prior1))

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #4
0
    def test_setup(self):
        # Create queue coordinator.
        num_layers = 50
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.valid_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        self.image_batch, self.label_batch = tf.expand_dims(
            image, dim=0), tf.expand_dims(label, dim=0)

        # Create network
        net, end_points = deeplabv3(self.image_batch,
                                    num_classes=self.conf.num_classes,
                                    depth=num_layers,
                                    is_training=True)
        raw_output = end_points['resnet{}/logits'.format(num_layers)]

        # predictions
        #raw_output = net.o  # [batch_size, 41, 41, 21]
        raw_output = tf.image.resize_bilinear(
            raw_output,
            tf.shape(self.image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        pred = tf.expand_dims(raw_output, dim=3)
        self.pred = tf.reshape(pred, [
            -1,
        ])
        # labels
        gt = tf.reshape(self.label_batch, [
            -1,
        ])
        # Ignoring all labels greater than or equal to n_classes.
        temp = tf.less_equal(gt, self.conf.num_classes - 1)
        weights = tf.cast(temp, tf.int32)

        # fix for tf 1.3.0
        gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

        # Pixel accuracy
        self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy(
            self.pred, gt, weights=weights)

        # mIoU
        self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #5
0
    def test_setup(self):
        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.valid_data_list,
                                 None, False, False, self.conf.ignore_label,
                                 IMG_MEAN, self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        self.image_batch, self.label_batch = tf.expand_dims(
            image, dim=0), tf.expand_dims(label, dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(self.image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(self.image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)
            pass

        # predictions
        raw_output = net.outputs
        raw_output = tf.image.resize_bilinear(
            raw_output,
            tf.shape(self.image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        pred = tf.expand_dims(raw_output, dim=3)
        self.pred = tf.reshape(pred, [
            -1,
        ])
        # labels
        gt = tf.reshape(self.label_batch, [
            -1,
        ])
        # Ignoring all labels greater than or equal to n_classes.
        temp = tf.less_equal(gt, self.conf.num_classes - 1)
        weights = tf.cast(temp, tf.int32)

        # fix for tf 1.3.0
        gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

        # Pixel accuracy
        self.accu, self.accu_update_op = tcm.streaming_accuracy(
            self.pred, gt, weights=weights)

        # mIoU
        self.mIoU, self.mIou_update_op = tcm.streaming_mean_iou(
            self.pred, gt, self.conf.num_classes, weights)

        # confusion matrix
        self.confusion_matrix = tcm.confusion_matrix(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
        pass
Beispiel #6
0
	def predict_setup(self):
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.test_data_list,
				None, # the images have different sizes
				False, # no data-aug
				False, # no data-aug
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			image, label = reader.image, reader.label # [h, w, 3 or 1]
		# Add one batch dimension [1, h, w, 3 or 1]
		image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0)
		h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2])
		image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))]))
		image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))]))
		

		# Create network
		if self.conf.encoder_name not in ['res101', 'res50']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50")
			sys.exit(-1)
		else:
			with tf.variable_scope('', reuse=False):
				net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name)
			with tf.variable_scope('', reuse=True):
				net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name)
			with tf.variable_scope('', reuse=True):
				net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name)

		# predictions
		# Network raw output
		raw_output100 = net.outputs
		raw_output075 = net075.outputs
		raw_output05 = net05.outputs
		raw_output = tf.reduce_max(tf.stack([raw_output100,
									tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]),
									tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0)
		raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
		raw_output = tf.argmax(raw_output, axis=3)
		self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

		# Create directory
		if not os.path.exists(self.conf.out_dir):
			os.makedirs(self.conf.out_dir)
			os.makedirs(self.conf.out_dir + '/prediction')
			if self.conf.visual:
				os.makedirs(self.conf.out_dir + '/visual_prediction')

		# Loader for loading the checkpoint
		self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #7
0
    def predict_setup(self):
        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.test_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        image_batch, label_batch = tf.expand_dims(image,
                                                  dim=0), tf.expand_dims(label,
                                                                         dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)

        # Predictions.
        raw_output = net.outputs
        raw_output = tf.image.resize_bilinear(raw_output,
                                              tf.shape(image_batch)[1:3, ])
        print("Vor dem argmax: ", type(raw_output))
        # array = raw_output.eval(session=self.sess)
        # print("Array ist:",array)
        # raw_output = tf.argmax(raw_output, axis=3)
        print("nach dem argmax", raw_output)
        raw_output_sm = tf.nn.softmax(raw_output)
        self.pred = tf.cast(tf.expand_dims(raw_output_sm, dim=3), tf.float32)
        print("Prediction is: ", self.pred)

        # Create directory
        if not os.path.exists(self.conf.out_dir):
            os.makedirs(self.conf.out_dir)
            os.makedirs(self.conf.out_dir + '/prediction')
            if self.conf.visual:
                os.makedirs(self.conf.out_dir + '/visual_prediction')

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #8
0
def detect_image_folder(image_folder, output_path, model_config_path,
                        model_checkpoint_path):
    """
    Runs object detection on a folder of images. Saves the results to a csv
    """

    img_reader = ImageReader(image_folder)

    model = Detector(model_config_path, model_checkpoint_path)
    results = model.detect_images(img_reader.load_images(),
                                  img_reader.filenames)

    results.to_csv(output_path, index=None)
Beispiel #9
0
    def predict_setup(self):
        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.test_data_list,
                                 None, False, False, self.conf.ignore_label,
                                 IMG_MEAN, self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        image_batch, label_batch = tf.expand_dims(image,
                                                  dim=0), tf.expand_dims(label,
                                                                         dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)
            pass

        # Predictions.
        raw_output = net.outputs
        raw_output = tf.image.resize_bilinear(raw_output,
                                              tf.shape(image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

        # Create directory
        if not os.path.exists(self.conf.out_dir):
            os.makedirs(self.conf.out_dir)
            os.makedirs(self.conf.out_dir + '/prediction')
            if self.conf.visual:
                os.makedirs(self.conf.out_dir + '/visual_prediction')

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
        pass
Beispiel #10
0
    def __init__(self, images, config, norm=None, shuffle=True):

        self.images = images

        # self.true_box_buffer = config['true_box_buffer'] # Maximun objects per box!!
        self.batch_size = config['batch_size']
        self.anchors = config['anchors']
        self.nb_anchors = len(config['anchors'])

        self.img_w, self.img_h = config['image_shape']
        self.grid = config['grid']
        self.img_encoder = ImageReader(img_width=self.img_w,
                                       img_height=self.img_h,
                                       norm=norm,
                                       grid=self.grid)

        self.labels = np.array(config['labels'])

        self.shuffle = shuffle
        if self.shuffle:
            np.random.shuffle(self.images)
Beispiel #11
0
def train(net, data_dict, optim):
    net.train()
    data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'train'))
    data_loader = DataLoader(data_set,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=8)

    l_data, t_data, n_data = 0.0, 0, 0
    for inputs, labels in data_loader:
        optim.zero_grad()
        out = net(inputs.to(DEVICE))
        loss = criterion(out, labels.to(DEVICE))
        print('loss:{:.4f}'.format(loss.item()), end='\r')
        loss.backward()
        optim.step()
        _, pred = torch.max(out, 1)
        l_data += loss.item()
        t_data += torch.sum(pred.cpu() == labels).item()
        n_data += len(labels)

    return l_data / n_data, t_data / n_data
    def test_setup(self):
        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.valid_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        self.image_batch, self.label_batch = tf.expand_dims(
            image, dim=0), tf.expand_dims(label, dim=0)
        h_orig, w_orig = tf.to_float(tf.shape(
            self.image_batch)[1]), tf.to_float(tf.shape(self.image_batch)[2])
        image_batch_075 = tf.image.resize_images(
            self.image_batch,
            tf.stack([
                tf.to_int32(tf.multiply(h_orig, 0.75)),
                tf.to_int32(tf.multiply(w_orig, 0.75))
            ]))
        image_batch_05 = tf.image.resize_images(
            self.image_batch,
            tf.stack([
                tf.to_int32(tf.multiply(h_orig, 0.5)),
                tf.to_int32(tf.multiply(w_orig, 0.5))
            ]))

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            with tf.variable_scope('', reuse=False):
                net = Deeplab_v2(self.image_batch, self.conf.num_classes,
                                 False)
            with tf.variable_scope('', reuse=True):
                net075 = Deeplab_v2(image_batch_075, self.conf.num_classes,
                                    False)
            with tf.variable_scope('', reuse=True):
                net05 = Deeplab_v2(image_batch_05, self.conf.num_classes,
                                   False)
        else:
            with tf.variable_scope('', reuse=False):
                net = ResNet_segmentation(self.image_batch,
                                          self.conf.num_classes, False,
                                          self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net075 = ResNet_segmentation(image_batch_075,
                                             self.conf.num_classes, False,
                                             self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net05 = ResNet_segmentation(image_batch_05,
                                            self.conf.num_classes, False,
                                            self.conf.encoder_name)

        # predictions
        # Network raw output
        raw_output100 = net.outputs
        raw_output075 = net075.outputs
        raw_output05 = net05.outputs
        raw_output = tf.reduce_max(tf.stack([
            raw_output100,
            tf.image.resize_images(raw_output075,
                                   tf.shape(raw_output100)[1:3, ]),
            tf.image.resize_images(raw_output05,
                                   tf.shape(raw_output100)[1:3, ])
        ]),
                                   axis=0)
        raw_output = tf.image.resize_bilinear(
            raw_output,
            tf.shape(self.image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        pred = tf.expand_dims(raw_output, dim=3)
        self.pred = tf.reshape(pred, [
            -1,
        ])
        # labels
        gt = tf.reshape(self.label_batch, [
            -1,
        ])
        # Ignoring all labels greater than or equal to n_classes.
        temp = tf.less_equal(gt, self.conf.num_classes - 1)
        weights = tf.cast(temp, tf.int32)

        # fix for tf 1.3.0
        gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

        # Pixel accuracy
        self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy(
            self.pred, gt, weights=weights)

        # mIoU
        self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # confusion matrix
        self.confusion_matrix = tf.contrib.metrics.confusion_matrix(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        h, w = (self.conf.input_height, self.conf.input_width)
        input_size = (h, w)

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            self.image_batch, self.label_batch = reader.dequeue(
                self.conf.batch_size)
            image_batch_075 = tf.image.resize_images(
                self.image_batch, [int(h * 0.75), int(w * 0.75)])
            image_batch_05 = tf.image.resize_images(
                self.image_batch, [int(h * 0.5), int(w * 0.5)])
            # #testWWang
            # image = self.image_batch[0]
            # label = self.label_batch[0]
            # utils.save_image(image, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images",
            #                  name = image ,mean = IMG_MEAN)
            # utils.save_image(label, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images",
            #                  name=label, mean=IMG_MEAN)
            #
            # #end

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            with tf.variable_scope('', reuse=False):
                net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
            with tf.variable_scope('', reuse=True):
                net075 = Deeplab_v2(image_batch_075, self.conf.num_classes,
                                    True)
            with tf.variable_scope('', reuse=True):
                net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'fc' not in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
        else:
            with tf.variable_scope('', reuse=False):
                net = ResNet_segmentation(self.image_batch,
                                          self.conf.num_classes, True,
                                          self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net075 = ResNet_segmentation(image_batch_075,
                                             self.conf.num_classes, True,
                                             self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net05 = ResNet_segmentation(image_batch_05,
                                            self.conf.num_classes, True,
                                            self.conf.encoder_name)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'resnet_v1' in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'resnet_v1' in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [
                v for v in all_trainable if 'decoder' in v.name
            ]

        decoder_w_trainable = [
            v for v in decoder_trainable
            if 'weights' in v.name or 'gamma' in v.name
        ]  # lr * 10.0
        decoder_b_trainable = [
            v for v in decoder_trainable
            if 'biases' in v.name or 'beta' in v.name
        ]  # lr * 20.0
        # Check
        assert (len(all_trainable) == len(decoder_trainable) +
                len(encoder_trainable))
        assert (len(decoder_trainable) == len(decoder_w_trainable) +
                len(decoder_b_trainable))

        # Network raw output
        raw_output100 = net.outputs
        raw_output075 = net075.outputs
        raw_output05 = net05.outputs
        raw_output = tf.reduce_max(tf.stack([
            raw_output100,
            tf.image.resize_images(raw_output075,
                                   tf.shape(raw_output100)[1:3, ]),
            tf.image.resize_images(raw_output05,
                                   tf.shape(raw_output100)[1:3, ])
        ]),
                                   axis=0)

        # Groud Truth: ignoring all labels greater or equal than n_classes
        label_proc = prepare_label(self.label_batch,
                                   tf.stack(raw_output.get_shape()[1:3]),
                                   num_classes=self.conf.num_classes,
                                   one_hot=False)  # [batch_size, h, w]
        label_proc075 = prepare_label(self.label_batch,
                                      tf.stack(raw_output075.get_shape()[1:3]),
                                      num_classes=self.conf.num_classes,
                                      one_hot=False)
        label_proc05 = prepare_label(self.label_batch,
                                     tf.stack(raw_output05.get_shape()[1:3]),
                                     num_classes=self.conf.num_classes,
                                     one_hot=False)

        raw_gt = tf.reshape(label_proc, [
            -1,
        ])
        raw_gt075 = tf.reshape(label_proc075, [
            -1,
        ])
        raw_gt05 = tf.reshape(label_proc05, [
            -1,
        ])

        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        indices075 = tf.squeeze(
            tf.where(tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1)
        indices05 = tf.squeeze(
            tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1)

        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
        gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

        raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
        raw_prediction100 = tf.reshape(raw_output100,
                                       [-1, self.conf.num_classes])
        raw_prediction075 = tf.reshape(raw_output075,
                                       [-1, self.conf.num_classes])
        raw_prediction05 = tf.reshape(raw_output05,
                                      [-1, self.conf.num_classes])

        prediction = tf.gather(raw_prediction, indices)
        prediction100 = tf.gather(raw_prediction100, indices)
        prediction075 = tf.gather(raw_prediction075, indices075)
        prediction05 = tf.gather(raw_prediction05, indices05)

        # Pixel-wise softmax_cross_entropy loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction100, labels=gt)
        loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction075, labels=gt075)
        loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction05, labels=gt05)
        # L2 regularization
        l2_losses = [
            self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable
            if 'weights' in v.name
        ]
        # Loss function
        self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
            loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(
                loss05) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr,
            tf.pow((1 - self.curr_step / self.conf.num_steps),
                   self.conf.power))
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt_encoder = tf.train.MomentumOptimizer(learning_rate,
                                                 self.conf.momentum)
        opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                   self.conf.momentum)
        opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                   self.conf.momentum)

        # Gradient accumulation
        # Define a variable to accumulate gradients.
        accum_grads = [
            tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False)
            for v in encoder_trainable + decoder_w_trainable +
            decoder_b_trainable
        ]
        # Define an operation to clear the accumulated gradients for next batch.
        self.zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads]
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        # Accumulate and normalise the gradients.
        self.accum_grads_op = [
            accum_grads[i].assign_add(grad / self.conf.grad_update_every)
            for i, grad in enumerate(grads)
        ]

        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        grads_encoder = accum_grads[:len(encoder_trainable)]
        grads_decoder_w = accum_grads[len(encoder_trainable):(
            len(encoder_trainable) + len(decoder_w_trainable))]
        grads_decoder_b = accum_grads[(len(encoder_trainable) +
                                       len(decoder_w_trainable)):]
        # Update params
        train_op_conv = opt_encoder.apply_gradients(
            zip(grads_encoder, encoder_trainable))
        train_op_fc_w = opt_decoder_w.apply_gradients(
            zip(grads_decoder_w, decoder_w_trainable))
        train_op_fc_b = opt_decoder_b.apply_gradients(
            zip(grads_decoder_b, decoder_b_trainable))
        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_conv, train_op_fc_w,
                                     train_op_fc_b)

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        self.loader = tf.train.Saver(var_list=restore_var)

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, dim=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 1, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 1, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 1, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=20)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
Beispiel #14
0
def main():
    args = get_arguments()

    print('SETUP TrainConfig...')
    train_cfg = TrainConfig(args)
    train_cfg.display()

    # print('SETUP EvalConfig...')
    eval_cfg = EvalConfig(args)
    # eval_cfg.display()

    train_reader = ImageReader(train_cfg)
    eval_reader = ImageReader(eval_cfg)

    train_net = ICNet(train_cfg, train_reader, eval_reader)

    _train_op, _losses, _summaries, _Preds, _IoUs, _Images = train_net.optimizer(
    )

    vis = Visualizer(eval_cfg)

    global_step = train_net.start_step
    epoch_step = int(
        len(train_reader.attribute_list) / train_cfg.BATCH_SIZE + 0.5)
    start_epoch = int(global_step / epoch_step)
    save_step = int(epoch_step * train_cfg.SAVE_PERIOD)

    all_steps = int(len(eval_reader.attribute_list) / (eval_cfg.BATCH_SIZE))
    g_eval_step = 0

    train_fd = {train_net.handle: train_net.train_handle}
    eval_fd = {train_net.handle: train_net.eval_handle}

    for epochs in range(start_epoch, train_cfg.TRAIN_EPOCHS):

        epoch_loss = None
        start_batch = global_step % epoch_step

        print(f'Start batch - {start_batch}')
        print(f'Epoch step - {epoch_step}')

        for steps in range(start_batch, epoch_step):
            start_time = time.time()

            _, losses = train_net.sess.run([_train_op, _losses],
                                           feed_dict=train_fd)

            if epoch_loss is None:
                epoch_loss = np.array(losses)
            else:
                epoch_loss += np.array(losses)

            if global_step % save_step == 0:
                train_net.save(global_step)

            global_step += 1

            duration = time.time() - start_time
            msg = (
                f'''step {global_step} \t total loss = {losses[3]:.3f}, sub4 = {losses[0]:.3f}, '''
                f'''sub24 = {losses[1]:.3f}, sub124 = {losses[2]:.3f}, val_loss: {losses[4]:.3f}'''
                f'''({duration:.3f} sec/step)''')
            print(msg)

        epoch_loss /= (epoch_step - start_batch)
        accuracy = None

        for steps in range(all_steps - 1):
            start_time = time.time()

            IoUs = train_net.sess.run(_IoUs, feed_dict=eval_fd)

            if accuracy is None:
                accuracy = np.array(IoUs)
            else:
                accuracy += np.array(IoUs)

            g_eval_step += 1

            duration = time.time() - start_time
            msg = (
                f'''step {steps} \t mean_IoU = {IoUs[0]:.3f}, Person_IoU = {IoUs[1]:.3f}, '''
                f'''Rider_IoU = {IoUs[2]:.3f}, ({duration:.3f} sec/step)''')
            print(msg)

        IoUs, Preds, Images = train_net.sess.run([_IoUs, _Preds, _Images],
                                                 feed_dict=eval_fd)
        accuracy += np.array(IoUs)
        accuracy /= all_steps

        g_eval_step += 1

        vis.save_and_show(Images, Preds, g_eval_step)

        feed_dict = {
            train_net.sum_loss: epoch_loss,
            train_net.sum_acc: accuracy
        }
        summaries = train_net.sess.run(_summaries, feed_dict=feed_dict)
        train_net.writer.add_summary(summaries, epochs)
Beispiel #15
0
    def train_setup(self, reuse=False):
        tf.set_random_seed(self.conf.random_seed)
        num_layers = 50  #-----------------------------------------------------------------------------------------

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()
        self.n_gpu = self.conf.n_gpu

        # Input size
        self.input_size = (self.conf.input_height, self.conf.input_width)
        j_step = 0
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 self.input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            # print "1"*22
            # print reader
        image_data, image_label = reader.dequeue(self.conf.batch_size)
        self.image_data = image_data
        if tf.__version__.startswith('1.'):
            split_train_data_node = tf.split(image_data, self.n_gpu)
            split_train_labels_node = tf.split(image_label, self.n_gpu)
        else:
            split_train_data_node = tf.split(0, self.n_gpu, image_data)
            split_train_labels_node = tf.split(0, self.n_gpu, image_label)
        with tf.variable_scope(tf.get_variable_scope()):
            all_loss = []
            for device_index, (i, self.image_batch,
                               self.label_batch) in enumerate(
                                   zip([1], split_train_data_node,
                                       split_train_labels_node)):
                with tf.device('/gpu:%d' % i):
                    #print i
                    with tf.name_scope('%s_%d' % ("gpu", i)) as scope:
                        if j_step == 0:
                            j_step = 1
                            pass
                        else:
                            reuse = True
                        # net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes,
                        #                          is_training=self.conf.is_training ,reuse=reuse)
                        net, end_points = deeplabv3(
                            self.image_batch,
                            num_classes=self.conf.num_classes,
                            depth=num_layers,
                            is_training=True,
                            reuse=reuse)
                        self.raw_output = end_points[
                            'gpu_{}/resnet{}/logits'.format(i, num_layers)]
                        # Network raw output
                        # [batch_size, 41, 41, 21]
                        output_size = (self.raw_output.shape[1].value,
                                       self.raw_output.shape[2].value)

                        label_proc = prepare_label(
                            self.label_batch,
                            output_size,
                            num_classes=self.conf.num_classes,
                            one_hot=False)  # [batch_size, 41, 41]
                        raw_gt = tf.reshape(label_proc, [
                            -1,
                        ])
                        indices = tf.squeeze(
                            tf.where(
                                tf.less_equal(raw_gt,
                                              self.conf.num_classes - 1)), 1)
                        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                        raw_prediction = tf.reshape(
                            self.raw_output, [-1, self.conf.num_classes])
                        # print raw_prediction
                        # print gt
                        prediction = raw_prediction
                        # prediction = tf.expand_dims(raw_prediction,  3)
                        # prediction = tl.act.pixel_wise_softmax(prediction)
                        # print prediction
                        # print label_proc
                        # loss = 1 - tl.cost.dice_coe(prediction, label_proc, axis=[1, 2, 3, 4])
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=prediction, labels=gt)
                        l2_losses = [
                            self.conf.weight_decay * tf.nn.l2_loss(v)
                            for v in tf.trainable_variables()
                            if 'weights' in v.name
                        ]
                        # Loss function
                        all_loss.append(
                            tf.reduce_mean(loss) + tf.add_n(l2_losses))
                        tf.get_variable_scope().reuse_variables()

        # Output size
        #output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value)

        # Variables that load from pre-trained model.
        # For training, last few layers should not be loaded.
        if self.conf.pretrain_file is not None:
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            original_step = int(self.conf.pretrain_file.split("-")[-1])
        else:
            original_step = 0
        num_steps = self.conf.num_steps + original_step
        # Trainable Variables
        # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
        # if they are presented in var_list of the optimiser definition.
        # So we remove them from the list.
        all_trainable = [
            v for v in tf.trainable_variables()
            if 'beta' not in v.name and 'gamma' not in v.name
        ]
        # Fine-tune part
        conv_trainable = [v for v in all_trainable
                          if 'fc' not in v.name]  # lr * 1.0
        # ASPP part
        fc_trainable = [v for v in all_trainable if 'fc' in v.name]
        # fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name]  # lr * 10.0
        # fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name]  # lr * 20.0
        # check
        #assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
        #assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

        # Groud Truth: ignoring all labels greater or equal than n_classes
        #label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes,
        #one_hot=False)  # [batch_size, 41, 41]
        #raw_gt = tf.reshape(label_proc, [-1, ])
        #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        #gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        #raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes])
        #prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax_cross_entropy loss
        #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
        # L2 regularization
        #l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
        # Loss function
        self.reduced_loss = tf.add_n(all_loss) / self.n_gpu
        #self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        self.loss_trans = tf.placeholder(dtype=tf.float32, shape=())
        self.final_loss = (self.reduced_loss + self.loss_trans) / 2

        learning_rate = tf.scalar_mul(
            base_lr, tf.pow((1 - self.curr_step / num_steps), self.conf.power))
        #print self.conf.power
        self.learning_rate = learning_rate
        #print  learning_rate
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt = tf.train.AdamOptimizer(learning_rate, self.conf.momentum, 0.98)
        #opt= tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
        #opt_fc_w = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98)
        #opt_fc_b = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98)
        #opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
        #opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
        #opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads_conv = tf.gradients(self.final_loss, conv_trainable)
        # train_op = opt.apply_gradients(zip(grads_conv, conv_trainable))
        #grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
        grads_conv = grads_conv[:len(conv_trainable)]
        # grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))]
        # grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
        # Update params
        train_op_conv = opt.apply_gradients(zip(grads_conv, conv_trainable))
        # train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
        # train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
        # train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
        # Finally, get the train_op!
        self.train_op = train_op_conv
        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        if self.conf.pretrain_file is not None:
            self.loader = tf.train.Saver(var_list=restore_var)
Beispiel #16
0
	def train_setup(self):
		tf.set_random_seed(self.conf.random_seed)
		
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Input size
		self.input_size = (self.conf.input_height, self.conf.input_width)
		
		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.data_list,
				self.input_size,
				self.conf.random_scale,
				self.conf.random_mirror,
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size)
		
		# Create network
		net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes,
			is_training=self.conf.is_training)
		#net = DeepLabVGGModel(self.image_batch, num_classes=self.conf.num_classes,
		#												 is_training=self.conf.is_training)
		
		# Network raw output
		self.raw_output = net.o # [batch_size, 41, 41, 21]

		self.raw_output=tf.image.resize_bilinear(self.raw_output, [350,350])
		print(tf.shape(self.image_batch))
		# Output size
		output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value)
		
		# Variables that load from pre-trained model.
		# For training, last few layers should not be loaded.
		#restore_var = [v for v in tf.global_variables() if 'fc' not in v.name]   #这个是对INIT初始化模型用的
		restore_var = [v for v in tf.global_variables() ]    #恢复所有的参数。
		# Trainable Variables
		# Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
		# if they are presented in var_list of the optimiser definition.
		# So we remove them from the list.
		all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
		# Fine-tune part
		conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
		# ASPP part
		fc_trainable = [v for v in all_trainable if 'fc' in v.name]
		fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
		fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
		# check
		print(len(fc_trainable))
		print(len(fc_w_trainable) + len(fc_b_trainable))
		assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
		assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

		# Groud Truth: ignoring all labels greater or equal than n_classes
		label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41]
		raw_gt = tf.reshape(label_proc, [-1,])
		indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
		gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
		raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes])
		prediction = tf.gather(raw_prediction, indices)

		# Pixel-wise softmax_cross_entropy loss
		loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
		
		# L2 regularization
		l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
		# Loss function
		self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

		# Define optimizers
		# 'poly' learning rate
		base_lr = tf.constant(self.conf.learning_rate)
		self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
		#learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (15000+self.curr_step) /(15000+ self.conf.num_steps)), self.conf.power))
		learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (self.curr_step) /(self.conf.num_steps)), self.conf.power))

		# We have several optimizers here in order to handle the different lr_mult
		# which is a kind of parameters in Caffe. This controls the actual lr for each
		# layer.
		opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
		opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
		opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
		# To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
		# Instead, we separate the steps compute_grads+update_params.
		# Compute grads
		grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
		grads_conv = grads[:len(conv_trainable)]
		grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
		grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
		# Update params
		train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
		train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
		train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
		# Finally, get the train_op!
		self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
		#self.train_op = tf.group(train_op_fc_w, train_op_fc_b) #只优化全连接部分

		# Saver for storing checkpoints of the model
		self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

		# Loader for loading the pre-trained model
		self.loader = tf.train.Saver(var_list=restore_var)
Beispiel #17
0
    parser.add_argument('--warm_up', default=2, type=int, help='warm up number')
    parser.add_argument('--recalls', default='1,2,4,8', type=str, help='selected recall')

    opt = parser.parse_args()
    # args parse
    data_path, data_name, backbone_type = opt.data_path, opt.data_name, opt.backbone_type
    feature_dim, batch_size, num_epochs = opt.feature_dim, opt.batch_size, opt.num_epochs
    warm_up, recalls = opt.warm_up, [int(k) for k in opt.recalls.split(',')]
    save_name_pre = '{}_{}_{}'.format(data_name, backbone_type, feature_dim)

    results = {'train_loss': [], 'train_accuracy': []}
    for recall_id in recalls:
        results['test_recall@{}'.format(recall_id)] = []

    # dataset loader
    train_data_set = ImageReader(data_path, data_name, 'train', backbone_type)
    train_data_loader = DataLoader(train_data_set, batch_size, shuffle=True, num_workers=8)
    test_data_set = ImageReader(data_path, data_name, 'test', backbone_type)
    test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=8)

    # model setup, optimizer config and loss definition
    model = Model(backbone_type, feature_dim, len(train_data_set.class_to_idx)).cuda()
    optimizer = AdamW([{'params': model.backbone.parameters()}, {'params': model.refactor.parameters()},
                       {'params': model.fc.parameters(), 'lr': 1e-2}], lr=1e-4, weight_decay=1e-4)
    lr_scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
    loss_criterion = ProxyAnchorLoss()

    data_base = {'test_images': test_data_set.images, 'test_labels': test_data_set.labels}
    best_recall = 0.0
    for epoch in range(1, num_epochs + 1):
Beispiel #18
0
def main():
    """Create the model and start the evaluation process."""
    args = get_arguments()

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            None,  # No defined input size.
            False,  # No random scale.
            False,  # No random mirror.
            args.ignore_label,
            IMG_MEAN,
            coord)
        image, label = reader.image, reader.label
    image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(
        label, dim=0)  # Add one batch dimension.

    # Create network.
    if args.encoder_name not in ['res101', 'res50']:
        print('encoder_name ERROR!')
        print("Please input: res101, res50")
        sys.exit(-1)
    else:
        net = ResNet_segmentation(image_batch, args.num_classes, False,
                                  args.encoder_name)

    # predictions
    raw_output = net.outputs
    raw_output = tf.image.resize_bilinear(raw_output,
                                          tf.shape(image_batch)[1:3, ])
    raw_output = tf.argmax(raw_output, axis=3)
    pred = tf.expand_dims(raw_output, dim=3)
    pred = tf.reshape(pred, [
        -1,
    ])
    # labels
    gt = tf.reshape(label_batch, [
        -1,
    ])
    # Ignoring all labels greater than or equal to n_classes.
    temp = tf.less_equal(gt, args.num_classes - 1)
    weights = tf.cast(temp, tf.int32)

    # fix for tf 1.3.0
    gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

    # Which variables to load.
    restore_var = tf.global_variables()

    # Predictions.
    raw_output = net.outputs
    raw_output = tf.image.resize_bilinear(raw_output,
                                          tf.shape(image_batch)[1:3, ])
    raw_output = tf.argmax(raw_output, dimension=3)
    pred = tf.expand_dims(raw_output, dim=3)  # Create 4-d tensor.
    pred = tf.reshape(pred, [
        -1,
    ])

    #groud truth
    gt = tf.reshape(label_batch, [
        -1,
    ])
    indexes = tf.less_equal(gt, args.num_classes - 1)
    gt = tf.where(indexes, gt, tf.cast(temp, tf.uint8))
    weights = tf.cast(
        indexes,
        tf.int32)  # Ignoring all labels greater than or equal to n_classes.
    # mIoU
    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(
        pred, gt, num_classes=args.num_classes, weights=weights)

    # Pixel accuracy
    accu, accu_update_op = tf.contrib.metrics.streaming_accuracy(
        pred, gt, weights=weights)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)
    sess.run(tf.local_variables_initializer())

    # Load weights.
    loader = tf.train.Saver(var_list=restore_var)
    if args.restore_from is not None:
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        preds, _, _ = sess.run([pred, update_op, accu_update_op])
        if step % 100 == 0:
            print('step {:d}'.format(step))
    print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
    print('Pixel Accuracy: {:.3f}'.format(accu.eval(session=sess)))
    coord.request_stop()
    coord.join(threads)
Beispiel #19
0
	def test_setup(self):
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.valid_data_list,
				None, # the images have different sizes
				False, # no data-aug
				False, # no data-aug
				self.conf.ignore_label,
				self.coord)
			image, label = reader.image, reader.label # [h, w, 3 or 1]
		# Add one batch dimension [1, h, w, 3 or 1]
		self.image_batch, self.label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0)
		self.image_batch = tf.identity( self.image_batch, name='image_batch')
		self.image_batch -= IMG_MEAN
		# Create network
		if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50, or deeplab")
			sys.exit(-1)
		elif self.conf.encoder_name == 'deeplab':
			net = Deeplab_v2(self.image_batch, self.conf.num_classes, False)
		else:
			net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name)
		
		# predictions
		raw_output = net.outputs
		raw_output = tf.image.resize_bilinear(raw_output, tf.shape(self.image_batch)[1:3,])
		raw_output = tf.argmax(raw_output, axis=3)
		pred = tf.expand_dims(raw_output, dim=3)
		self.pred = tf.reshape(pred, [-1,], name="predictions")
		# labels
		gt = tf.reshape(self.label_batch, [-1,])
		# Ignoring all labels greater than or equal to n_classes.
		temp = tf.less_equal(gt, self.conf.num_classes - 1)
		weights = tf.cast(temp, tf.int32)

		# fix for tf 1.3.0
		gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

		# Pixel accuracy
		self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy(
			self.pred, gt, weights=weights)

		# mIoU
		self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou(
			self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

		# f1 score
		pred = tf.cast(self.pred, tf.int32)
		gt = tf.cast(gt, tf.int32)
        
		self.areaOverlap = tf.count_nonzero(pred * gt)
		self.areaGTObj = tf.count_nonzero(gt)
		self.areaPredicted = tf.count_nonzero(pred)
        
        
		# Loader for loading the checkpoint
		self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #20
0
    data_path, data_name, crop_type, backbone_type = opt.data_path, opt.data_name, opt.crop_type, opt.backbone_type
    gd_config, feature_dim, smoothing, temperature = opt.gd_config, opt.feature_dim, opt.smoothing, opt.temperature
    margin, recalls, batch_size = opt.margin, [
        int(k) for k in opt.recalls.split(',')
    ], opt.batch_size
    num_epochs = opt.num_epochs
    save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}_{}'.format(
        data_name, crop_type, backbone_type, gd_config, feature_dim, smoothing,
        temperature, margin, batch_size)

    results = {'train_loss': [], 'train_accuracy': []}
    for recall_id in recalls:
        results['test_recall@{}'.format(recall_id)] = []

    # dataset loader
    train_data_set = ImageReader(data_path, data_name, 'train', crop_type)
    train_sample = MPerClassSampler(train_data_set.labels, batch_size)
    train_data_loader = DataLoader(train_data_set,
                                   batch_sampler=train_sample,
                                   num_workers=8)
    test_data_set = ImageReader(data_path, data_name,
                                'query' if data_name == 'isc' else 'test',
                                crop_type)
    test_data_loader = DataLoader(test_data_set,
                                  batch_size,
                                  shuffle=False,
                                  num_workers=8)
    eval_dict = {'test': {'data_loader': test_data_loader}}
    if data_name == 'isc':
        gallery_data_set = ImageReader(data_path, data_name, 'gallery',
                                       crop_type)
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        h, w = (self.conf.input_height, self.conf.input_width)
        input_size = (h, w)

        # Devices
        gpu_list = get_available_gpus()
        zip_encoder, zip_decoder_b, zip_decoder_w, zip_crf = [], [], [], []
        previous_crf_names = []
        restore_vars = []
        self.loaders = []

        self.im_list = []

        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                # Load reader
                with tf.name_scope("create_inputs"):
                    reader = ImageReader(self.conf.data_dir,
                                         self.conf.data_list, input_size,
                                         self.conf.random_scale,
                                         self.conf.random_mirror,
                                         self.conf.ignore_label, IMG_MEAN,
                                         self.coord)
                    self.image_batch, self.label_batch = reader.dequeue(
                        self.conf.batch_size)
                    self.im_list.append(self.image_batch)
                    image_batch_075 = tf.image.resize_images(
                        self.image_batch,
                        [int(h * 0.75), int(w * 0.75)])
                    image_batch_05 = tf.image.resize_images(
                        self.image_batch,
                        [int(h * 0.5), int(w * 0.5)])

                # Create network
                with tf.variable_scope('', reuse=False):
                    net = Deeplab_v2(self.image_batch,
                                     self.conf.num_classes,
                                     True,
                                     rescale075=False,
                                     rescale05=False,
                                     crf_type=self.conf.crf_type)

                with tf.variable_scope('', reuse=True):
                    net075 = Deeplab_v2(image_batch_075,
                                        self.conf.num_classes,
                                        True,
                                        rescale075=True,
                                        rescale05=False,
                                        crf_type=self.conf.crf_type)

                with tf.variable_scope('', reuse=True):
                    net05 = Deeplab_v2(image_batch_05,
                                       self.conf.num_classes,
                                       True,
                                       rescale075=False,
                                       rescale05=True,
                                       crf_type=self.conf.crf_type)

                # Variables that load from pre-trained model.
                restore_var = [
                    v for v in tf.global_variables()
                    if ('fc' not in v.name and 'crfrnn' not in v.name)
                ]
                restore_vars.append(restore_var)

                # Trainable Variables
                all_trainable = tf.trainable_variables()
                # Fine-tune part
                for name in previous_crf_names:
                    for v in all_trainable:
                        if v.name == name:
                            all_trainable.remove(v)

                crf_trainable = [
                    v for v in all_trainable
                    if ('crfrnn' in v.name and v.name not in previous_crf_names
                        )
                ]
                previous_crf_names.extend(v.name for v in crf_trainable)
                encoder_trainable = [
                    v for v in all_trainable
                    if 'fc' not in v.name and 'crfrnn' not in v.name
                ]  # lr * 1.0

                # Remove encoder_trainable from all_trainable
                #all_trainable = [v for v in all_trainable if v not in encoder_trainable]

                # Decoder part
                decoder_trainable = [
                    v for v in all_trainable
                    if 'fc' in v.name and 'crfrnn' not in v.name
                ]

                decoder_w_trainable = [
                    v for v in decoder_trainable
                    if ('weights' in v.name or 'gamma' in v.name)
                    and 'crfrnn' not in v.name
                ]  # lr * 10.0
                decoder_b_trainable = [
                    v for v in decoder_trainable
                    if ('biases' in v.name or 'beta' in v.name)
                    and 'crfrnn' not in v.name
                ]  # lr * 20.0
                # Check
                assert (len(all_trainable) == len(decoder_trainable) +
                        len(crf_trainable)) + len(encoder_trainable)
                assert (len(decoder_trainable) == len(decoder_w_trainable) +
                        len(decoder_b_trainable))

                # Network raw output
                raw_output100 = net.outputs

                raw_output075 = net075.outputs
                raw_output05 = net05.outputs
                raw_output = tf.reduce_max(tf.stack([
                    raw_output100,
                    tf.image.resize_images(raw_output075,
                                           tf.shape(raw_output100)[1:3, ]),
                    tf.image.resize_images(raw_output05,
                                           tf.shape(raw_output100)[1:3, ])
                ]),
                                           axis=0)

                # Ground Truth: ignoring all labels greater or equal than n_classes
                label_proc = prepare_label(self.label_batch,
                                           tf.stack(
                                               raw_output.get_shape()[1:3]),
                                           num_classes=self.conf.num_classes,
                                           one_hot=True)  # [batch_size, h, w]
                label_proc075 = prepare_label(
                    self.label_batch,
                    tf.stack(raw_output075.get_shape()[1:3]),
                    num_classes=self.conf.num_classes,
                    one_hot=True)
                label_proc05 = prepare_label(
                    self.label_batch,
                    tf.stack(raw_output05.get_shape()[1:3]),
                    num_classes=self.conf.num_classes,
                    one_hot=True)

                raw_gt = tf.reshape(label_proc, [
                    -1,
                ])
                raw_gt075 = tf.reshape(label_proc075, [
                    -1,
                ])
                raw_gt05 = tf.reshape(label_proc05, [
                    -1,
                ])

                indices = tf.squeeze(
                    tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)),
                    1)
                indices075 = tf.squeeze(
                    tf.where(
                        tf.less_equal(raw_gt075, self.conf.num_classes - 1)),
                    1)
                indices05 = tf.squeeze(
                    tf.where(tf.less_equal(raw_gt05,
                                           self.conf.num_classes - 1)), 1)

                gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
                gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

                raw_prediction = tf.reshape(raw_output,
                                            [-1, self.conf.num_classes])
                raw_prediction100 = tf.reshape(raw_output100,
                                               [-1, self.conf.num_classes])
                raw_prediction075 = tf.reshape(raw_output075,
                                               [-1, self.conf.num_classes])
                raw_prediction05 = tf.reshape(raw_output05,
                                              [-1, self.conf.num_classes])

                prediction = tf.gather(raw_prediction, indices)
                prediction100 = tf.gather(raw_prediction100, indices)
                prediction075 = tf.gather(raw_prediction075, indices075)
                prediction05 = tf.gather(raw_prediction05, indices05)

                # Pixel-wise softmax_cross_entropy loss
                #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
                loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction,
                    labels=tf.reshape(label_proc[0],
                                      (h * w, self.conf.num_classes)))
                '''
                coefficients = [0.01460247, 1.25147725, 2.88479363, 1.20348121, 1.65261654, 1.67514772,
                                0.62338799, 0.7729363,  0.42038501, 0.98557268, 1.31867536, 0.85313332,
                                0.67227604, 1.21317965, 1.        , 0.24263748, 1.80877607, 1.3082213,
                                0.79664027, 0.72543945, 1.27823374]
                '''
                #loss = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction)
                #loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt)
                loss100 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction100,
                    labels=tf.reshape(label_proc[0],
                                      (h * w, self.conf.num_classes)))
                #loss100 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction100)
                #loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075)
                loss075 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction075,
                    labels=tf.reshape(label_proc075[0],
                                      (int(h * 0.75) * int(w * 0.75),
                                       self.conf.num_classes)))
                #loss075 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes)), logits=raw_prediction075)
                #loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05)
                loss05 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction05,
                    labels=tf.reshape(
                        label_proc05[0],
                        (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)))
                #loss05 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)), logits=raw_prediction05)

                # L2 regularization
                l2_losses = [
                    self.conf.weight_decay * tf.nn.l2_loss(v)
                    for v in all_trainable if 'weights' in v.name
                ]

                # Loss function
                self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
                    loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(
                        loss05) + tf.add_n(l2_losses)

                # Define optimizers
                # 'poly' learning rate
                base_lr = tf.constant(self.conf.learning_rate)
                self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
                learning_rate = tf.scalar_mul(
                    base_lr,
                    tf.pow((1 - self.curr_step / self.conf.num_steps),
                           self.conf.power))
                # We have several optimizers here in order to handle the different lr_mult
                # which is a kind of parameters in Caffe. This controls the actual lr for each
                # layer.
                opt_encoder = tf.train.MomentumOptimizer(
                    learning_rate, self.conf.momentum)
                opt_decoder_w = tf.train.MomentumOptimizer(
                    learning_rate * 10.0, self.conf.momentum)
                opt_decoder_b = tf.train.MomentumOptimizer(
                    learning_rate * 20.0, self.conf.momentum)
                opt_crf = tf.train.MomentumOptimizer(learning_rate,
                                                     self.conf.momentum)

                # Gradient accumulation
                # Define a variable to accumulate gradients.
                accum_grads = [
                    tf.Variable(tf.zeros_like(v.initialized_value()),
                                trainable=False) for v in encoder_trainable +
                    decoder_w_trainable + decoder_b_trainable + crf_trainable
                ]

                # Define an operation to clear the accumulated gradients for next batch.
                self.zero_op = [
                    v.assign(tf.zeros_like(v)) for v in accum_grads
                ]
                # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
                # Instead, we separate the steps compute_grads+update_params.
                # Compute grads
                grads = tf.gradients(
                    self.reduced_loss, encoder_trainable +
                    decoder_w_trainable + decoder_b_trainable + crf_trainable)
                # Accumulate and normalise the gradients.
                self.accum_grads_op = [
                    accum_grads[i].assign_add(grad /
                                              self.conf.grad_update_every)
                    for i, grad in enumerate(grads)
                ]

                grads_encoder = accum_grads[:len(encoder_trainable)]
                grads_decoder_w = accum_grads[len(encoder_trainable
                                                  ):len(encoder_trainable) +
                                              len(decoder_w_trainable)]
                grads_decoder_b = accum_grads[(
                    len(encoder_trainable) +
                    len(decoder_w_trainable)):(len(encoder_trainable) +
                                               len(decoder_w_trainable) +
                                               len(decoder_b_trainable))]
                grads_crf = accum_grads[
                    len(encoder_trainable) + len(decoder_w_trainable) +
                    len(decoder_b_trainable
                        ):]  # assuming crf gradients are appended to the end

                zip_encoder.append(list(zip(grads_encoder, encoder_trainable)))
                zip_decoder_b.append(
                    list(zip(grads_decoder_b, decoder_b_trainable)))
                zip_decoder_w.append(
                    list(zip(grads_decoder_w, decoder_w_trainable)))
                zip_crf.append(list(zip(grads_crf, crf_trainable)))

        avg_grads_encoder = average_gradients(zip_encoder)
        avg_grads_decoder_w = average_gradients(zip_decoder_w)
        avg_grads_decoder_b = average_gradients(zip_decoder_b)
        avg_grads_crf = average_gradients(zip_crf)

        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                # Update params
                train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder)
                train_op_fc_w = opt_decoder_w.apply_gradients(
                    avg_grads_decoder_w)
                train_op_fc_b = opt_decoder_b.apply_gradients(
                    avg_grads_decoder_b)
                train_op_crf = opt_crf.apply_gradients(avg_grads_crf)

        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_fc_w, train_op_fc_b,
                                     train_op_crf)  # train_op_conv

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                self.loaders.append(tf.train.Saver(var_list=restore_vars[i]))
                #self.loaders.append(tf.train.Saver(var_list=tf.global_variables()))

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, axis=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 1, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 1, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 1, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=1)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
Beispiel #22
0
        int(k) for k in opt.recalls.split(',')
    ], opt.batch_size
    num_epochs = opt.num_epochs
    save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}'.format(data_name, backbone_type,
                                                     gd_config, feature_dim,
                                                     smoothing, temperature,
                                                     margin, batch_size)

    results = {'train_loss': [], 'train_accuracy': []}
    for recall_id in recalls:
        results['test_recall@{}'.format(recall_id)] = []

    process_sop_data(opt.data_dir, opt.df_path)

    # dataset loader
    train_data_set = ImageReader(data_path, data_name, 'train')
    train_sample = MPerClassSampler(train_data_set.labels, batch_size)
    train_data_loader = DataLoader(train_data_set,
                                   batch_sampler=train_sample,
                                   num_workers=opt.workers,
                                   pin_memory=True)
    test_data_set = ImageReader(data_path, data_name, 'test')
    test_data_loader = DataLoader(test_data_set,
                                  batch_size,
                                  shuffle=False,
                                  num_workers=opt.workers,
                                  pin_memory=True)
    eval_dict = {'test': {'data_loader': test_data_loader}}

    # model setup, model profile, optimizer config and loss definition
    model = Model(backbone_type,
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        input_size = (self.conf.input_height, self.conf.input_width)

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            self.image_batch, self.label_batch = reader.dequeue(
                self.conf.batch_size)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'fc' not in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
        else:
            net = ResNet_segmentation(self.image_batch, self.conf.num_classes,
                                      True, self.conf.encoder_name)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'resnet_v1' in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'resnet_v1' in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [
                v for v in all_trainable if 'decoder' in v.name
            ]

        decoder_w_trainable = [
            v for v in decoder_trainable
            if 'weights' in v.name or 'gamma' in v.name
        ]  # lr * 10.0
        decoder_b_trainable = [
            v for v in decoder_trainable
            if 'biases' in v.name or 'beta' in v.name
        ]  # lr * 20.0
        # Check
        assert (len(all_trainable) == len(decoder_trainable) +
                len(encoder_trainable))
        assert (len(decoder_trainable) == len(decoder_w_trainable) +
                len(decoder_b_trainable))

        # Network raw output
        raw_output = net.outputs  # [batch_size, h, w, 21]

        # Output size
        output_shape = tf.shape(raw_output)
        output_size = (output_shape[1], output_shape[2])

        # Groud Truth: ignoring all labels greater or equal than n_classes
        label_proc = prepare_label(self.label_batch,
                                   output_size,
                                   num_classes=self.conf.num_classes,
                                   one_hot=False)
        raw_gt = tf.reshape(label_proc, [
            -1,
        ])
        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
        prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax_cross_entropy loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        # L2 regularization
        l2_losses = [
            self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable
            if 'weights' in v.name
        ]
        # Loss function
        self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr,
            tf.pow((1 - self.curr_step / self.conf.num_steps),
                   self.conf.power))
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt_encoder = tf.train.MomentumOptimizer(learning_rate,
                                                 self.conf.momentum)
        opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                   self.conf.momentum)
        opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                   self.conf.momentum)
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        grads_encoder = grads[:len(encoder_trainable)]
        grads_decoder_w = grads[len(encoder_trainable):(
            len(encoder_trainable) + len(decoder_w_trainable))]
        grads_decoder_b = grads[(len(encoder_trainable) +
                                 len(decoder_w_trainable)):]
        # Update params
        train_op_conv = opt_encoder.apply_gradients(
            zip(grads_encoder, encoder_trainable))
        train_op_fc_w = opt_decoder_w.apply_gradients(
            zip(grads_decoder_w, decoder_w_trainable))
        train_op_fc_b = opt_decoder_b.apply_gradients(
            zip(grads_decoder_b, decoder_b_trainable))
        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_conv, train_op_fc_w,
                                     train_op_fc_b)

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        self.loader = tf.train.Saver(var_list=restore_var)

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, dim=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 2, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 2, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 2, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=2)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
Beispiel #24
0
def full_run_single(video_id,
                    video_dir,
                    static_dir,
                    frame_by_frame_results_dir,
                    static_results_dir,
                    crop_boxes_dir,
                    ignore_mask_dir,
                    detector_config_path,
                    detector_model_path,
                    reid_model_path,
                    reid_model_backbone,
                    crop_results_dir,
                    anomaly_results_dir,
                    bg_interval=4,
                    bg_alpha=0.05,
                    bg_start_frame=1,
                    bg_threshold=5,
                    raw_detect_interval=30,
                    crop_min_obj_size=8,
                    crop_row_capacity=3,
                    crop_box_aspect_ratio=2,
                    ignore_count_thresh=0.08,
                    ignore_area_thresh=2000,
                    ignore_score_thresh=0.1,
                    ignore_gau_sigma=3,
                    abnormal_duration_thresh=60,
                    detect_duration_thresh=6,
                    undetect_duration_thresh=8,
                    bbox_score_thresh=0.3,
                    light_thresh=0.8,
                    anomaly_thresh=0.8,
                    similarity_thresh=0.95,
                    suspicious_duration_thresh=18,
                    detector_verbose_interval=20,
                    verbose=True):
    """
    Runs the full anomaly detection pipeline on a video

    video_id: video id/name
    video_dir: folder the video is in
    static_dir: folder to put the background images in
    frame_by_frame_results_dir: folder to put the raw video detection results in
    static_results_dir: folder to put the background image detection results in
    crop_boxes_dir: folder to put the crop boxes in
    ignore_mask_dir: folder to put the ignore region mask in

    detector_config_path: path to detector configuration file
    detector_model_path: path to detector model checkpoint
    reid_model_path: path to re-ID model checkpoint
    reid_model_backbone: re-ID model backbone. eg. "resnet50"

    bg_interval, bg_alpha, bg_start_frame, bg_threshold: see calc_bg_full_video function
    raw_detect_interval: number of frames between detection on raw video
    crop_min_obj_size, crop_row_capacity, crop_box_aspect_ratio: see create_crop_boxes function
    ignore_count_thresh, ignore_area_thresh, ignore_score_thresh, ignore_gau_sigma: see create_ignore_mask function
    abnormal_duration_thresh, detect_duration_thresh, undetect_duration_thresh, bbox_score_thresh,
        light_thresh, anomaly_thresh, similarity_thresh, suspicious_duration_thresh:
            See get_anomalies function

    detector_verbose_interval: detector progress printing interval
    verbose: verbose printing


    """

    # Set up file paths
    video_path = os.path.join(video_dir, f"{video_id}.mp4")
    static_images_folder = os.path.join(static_dir, f"{video_id}")
    fbf_results_path = os.path.join(frame_by_frame_results_dir,
                                    f"{video_id}.csv")
    static_results_path = os.path.join(static_results_dir, f"{video_id}.csv")
    crop_boxes_path = os.path.join(crop_boxes_dir, f"{video_id}.csv")
    crop_results_path = os.path.join(crop_results_dir, f"{video_id}.csv")
    ignore_mask_path = os.path.join(ignore_mask_dir, f"{video_id}.npy")
    anomaly_results_path = os.path.join(anomaly_results_dir, f"{video_id}.csv")

    # Create folders
    os.makedirs(static_images_folder, exist_ok=True)
    os.makedirs(frame_by_frame_results_dir, exist_ok=True)
    os.makedirs(static_results_dir, exist_ok=True)
    os.makedirs(crop_boxes_dir, exist_ok=True)
    os.makedirs(crop_results_dir, exist_ok=True)
    os.makedirs(ignore_mask_dir, exist_ok=True)
    os.makedirs(anomaly_results_dir, exist_ok=True)

    # Read Video
    raw_video = VideoReader(video_path)

    # bg modeling
    print("Creating background...")
    calc_bg_full_video(video_path, static_images_folder, bg_interval, bg_alpha,
                       bg_start_frame, bg_threshold, verbose)

    # Detection
    detector = Detector(detector_config_path,
                        detector_model_path,
                        detector_verbose_interval,
                        class_restrictions=None)
    # class_names = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
    #                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
    #                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
    #                'tvmonitor')
    # detector.model.CLASSES = class_names
    # detector.class_labels = class_names
    ## Raw Video
    print("Detecting raw video...")
    raw_images, raw_frame_nums = raw_video.load_video(raw_detect_interval)
    fbf_results = detector.detect_images(raw_images, raw_frame_nums)
    fbf_results.to_csv(fbf_results_path, index=False)

    ## Static Images
    static_reader = ImageReader(static_images_folder)
    static_frame_names = list(
        map(lambda f: int(f[:-4]),
            static_reader.filenames))  # "123.jpg" -> 123

    print("Detecting background...")
    static_results = detector.detect_images(static_reader.load_images(),
                                            static_frame_names)
    static_results.to_csv(static_results_path, index=False)

    # Perspective Cropping
    print("Creating crop boxes...")
    create_crop_boxes(
        fbf_results_path, crop_boxes_path, raw_video.img_shape,
        crop_min_obj_size, crop_row_capacity,
        crop_box_aspect_ratio)  # either static/fbf results should work

    # Should be able to use this in place of normal static images. Doesnt look feasable atm, way too long detection time
    crop_boxes = pd.read_csv(crop_boxes_path).values
    print("Detecting cropped background...")
    crop_detect_results = detector.detect_images(static_reader.load_images(),
                                                 static_frame_names,
                                                 crop_boxes=crop_boxes)
    crop_detect_results.to_csv(crop_results_path)

    #     # Ignore Region
    print("Creating ingore mask...")
    create_ignore_mask(fbf_results_path, ignore_mask_path, raw_video.img_shape,
                       ignore_count_thresh, ignore_area_thresh,
                       ignore_score_thresh, ignore_gau_sigma)

    # Detect anomalies
    print("Detecting anomalies...")
    anomalies = get_anomalies_preprocessed(
        video_path, reid_model_path, fbf_results_path, static_results_path,
        ignore_mask_path, reid_model_backbone, bg_start_frame, bg_interval,
        abnormal_duration_thresh, detect_duration_thresh,
        undetect_duration_thresh, bbox_score_thresh, light_thresh,
        anomaly_thresh, similarity_thresh, suspicious_duration_thresh, verbose)

    if anomalies is not None:
        anomaly_event_times = get_overlapping_time(anomalies)

        # Save results
        print("Saving Results...")
        anomalies.to_csv(anomaly_results_path, index=False)

        return anomalies, anomaly_event_times

    else:
        return [], []