Beispiel #1
0
def detect_image_folder(image_folder, output_path, model_config_path,
                        model_checkpoint_path):
    """
    Runs object detection on a folder of images. Saves the results to a csv
    """

    img_reader = ImageReader(image_folder)

    model = Detector(model_config_path, model_checkpoint_path)
    results = model.detect_images(img_reader.load_images(),
                                  img_reader.filenames)

    results.to_csv(output_path, index=None)
Beispiel #2
0
def eval(net, data_dict, ensemble_num, recalls):
    net.eval()
    data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'test'))
    data_loader = DataLoader(data_set,
                             BATCH_SIZE,
                             shuffle=False,
                             num_workers=8)

    features = []
    with torch.no_grad():
        for inputs, labels in data_loader:
            out = net(inputs.to(DEVICE))
            out = F.normalize(out)
            features.append(out.cpu())
    features = torch.cat(features, 0)
    torch.save(
        features,
        'results/{}_test_features_{:03}.pth'.format(DATA_NAME, ensemble_num))
    # load feature vectors
    features = [
        torch.load('results/{}_test_features_{:03}.pth'.format(DATA_NAME, d))
        for d in range(1, ensemble_num + 1)
    ]
    features = torch.cat(features, 1)
    acc_list = recall(features, data_set.labels, rank=recalls)
    desc = ''
    for index, recall_id in enumerate(recalls):
        desc += 'R@{}:{:.2f}% '.format(recall_id, acc_list[index] * 100)
    print(desc)
Beispiel #3
0
    def test_setup(self):
        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.valid_data_list,
                                 None, False, False, self.conf.ignore_label,
                                 IMG_MEAN, self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        self.image_batch, self.label_batch = tf.expand_dims(
            image, dim=0), tf.expand_dims(label, dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(self.image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(self.image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)
            pass

        # predictions
        raw_output = net.outputs
        raw_output = tf.image.resize_bilinear(
            raw_output,
            tf.shape(self.image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        pred = tf.expand_dims(raw_output, dim=3)
        self.pred = tf.reshape(pred, [
            -1,
        ])
        # labels
        gt = tf.reshape(self.label_batch, [
            -1,
        ])
        # Ignoring all labels greater than or equal to n_classes.
        temp = tf.less_equal(gt, self.conf.num_classes - 1)
        weights = tf.cast(temp, tf.int32)

        # fix for tf 1.3.0
        gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

        # Pixel accuracy
        self.accu, self.accu_update_op = tcm.streaming_accuracy(
            self.pred, gt, weights=weights)

        # mIoU
        self.mIoU, self.mIou_update_op = tcm.streaming_mean_iou(
            self.pred, gt, self.conf.num_classes, weights)

        # confusion matrix
        self.confusion_matrix = tcm.confusion_matrix(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
        pass
Beispiel #4
0
    def predict_setup(self):
        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.test_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        image_batch, label_batch = tf.expand_dims(image,
                                                  dim=0), tf.expand_dims(label,
                                                                         dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)

        # Predictions.
        global raw_output_
        raw_output = tf.nn.softmax(net.outputs, axis=-1)
        prior1 = self.conf.prior
        prior = 1 - prior1
        class0, class1 = tf.split(raw_output, 2, -1)
        class0 = (class0 * prior) / (class0 * prior + class1 * prior1)
        class1 = (class1 * prior1) / (class0 * prior + class1 * prior1)
        raw_output = tf.concat([class0, class1], -1)

        raw_output_ = raw_output
        raw_output = tf.image.resize_bilinear(raw_output,
                                              tf.shape(image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

        # Create directory
        if not os.path.exists(self.conf.out_dir + '/prediction' + '/' +
                              str(prior1)):
            #os.makedirs(self.conf.out_dir)
            os.makedirs(self.conf.out_dir + '/prediction' + '/' + str(prior1))
            if self.conf.visual:
                os.makedirs(self.conf.out_dir + '/visual_prediction' + '/' +
                            str(prior1))

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #5
0
    def test_setup(self):
        # Create queue coordinator.
        num_layers = 50
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.valid_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        self.image_batch, self.label_batch = tf.expand_dims(
            image, dim=0), tf.expand_dims(label, dim=0)

        # Create network
        net, end_points = deeplabv3(self.image_batch,
                                    num_classes=self.conf.num_classes,
                                    depth=num_layers,
                                    is_training=True)
        raw_output = end_points['resnet{}/logits'.format(num_layers)]

        # predictions
        #raw_output = net.o  # [batch_size, 41, 41, 21]
        raw_output = tf.image.resize_bilinear(
            raw_output,
            tf.shape(self.image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        pred = tf.expand_dims(raw_output, dim=3)
        self.pred = tf.reshape(pred, [
            -1,
        ])
        # labels
        gt = tf.reshape(self.label_batch, [
            -1,
        ])
        # Ignoring all labels greater than or equal to n_classes.
        temp = tf.less_equal(gt, self.conf.num_classes - 1)
        weights = tf.cast(temp, tf.int32)

        # fix for tf 1.3.0
        gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

        # Pixel accuracy
        self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy(
            self.pred, gt, weights=weights)

        # mIoU
        self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #6
0
 def embed_url_image(self, image_url, id=None):
     try:
         image = ImageReader.read_from_url(image_url)
         return self.embed_pil_image(image, id)
     except Exception as e:
         print(f'Failed to create embeddings for id {id}')
         print(e)
         return None
Beispiel #7
0
	def predict_setup(self):
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.test_data_list,
				None, # the images have different sizes
				False, # no data-aug
				False, # no data-aug
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			image, label = reader.image, reader.label # [h, w, 3 or 1]
		# Add one batch dimension [1, h, w, 3 or 1]
		image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0)
		h_orig, w_orig = tf.to_float(tf.shape(image_batch)[1]), tf.to_float(tf.shape(image_batch)[2])
		image_batch_075 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.75)), tf.to_int32(tf.multiply(w_orig, 0.75))]))
		image_batch_05 = tf.image.resize_images(image_batch, tf.stack([tf.to_int32(tf.multiply(h_orig, 0.5)), tf.to_int32(tf.multiply(w_orig, 0.5))]))
		

		# Create network
		if self.conf.encoder_name not in ['res101', 'res50']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50")
			sys.exit(-1)
		else:
			with tf.variable_scope('', reuse=False):
				net = ResNet_segmentation(image_batch, self.conf.num_classes, False, self.conf.encoder_name)
			with tf.variable_scope('', reuse=True):
				net075 = ResNet_segmentation(image_batch_075, self.conf.num_classes, False, self.conf.encoder_name)
			with tf.variable_scope('', reuse=True):
				net05 = ResNet_segmentation(image_batch_05, self.conf.num_classes, False, self.conf.encoder_name)

		# predictions
		# Network raw output
		raw_output100 = net.outputs
		raw_output075 = net075.outputs
		raw_output05 = net05.outputs
		raw_output = tf.reduce_max(tf.stack([raw_output100,
									tf.image.resize_images(raw_output075, tf.shape(raw_output100)[1:3,]),
									tf.image.resize_images(raw_output05, tf.shape(raw_output100)[1:3,])]), axis=0)
		raw_output = tf.image.resize_bilinear(raw_output, tf.shape(image_batch)[1:3,])
		raw_output = tf.argmax(raw_output, axis=3)
		self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

		# Create directory
		if not os.path.exists(self.conf.out_dir):
			os.makedirs(self.conf.out_dir)
			os.makedirs(self.conf.out_dir + '/prediction')
			if self.conf.visual:
				os.makedirs(self.conf.out_dir + '/visual_prediction')

		# Loader for loading the checkpoint
		self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #8
0
    def predict_setup(self):
        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.test_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        image_batch, label_batch = tf.expand_dims(image,
                                                  dim=0), tf.expand_dims(label,
                                                                         dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)

        # Predictions.
        raw_output = net.outputs
        raw_output = tf.image.resize_bilinear(raw_output,
                                              tf.shape(image_batch)[1:3, ])
        print("Vor dem argmax: ", type(raw_output))
        # array = raw_output.eval(session=self.sess)
        # print("Array ist:",array)
        # raw_output = tf.argmax(raw_output, axis=3)
        print("nach dem argmax", raw_output)
        raw_output_sm = tf.nn.softmax(raw_output)
        self.pred = tf.cast(tf.expand_dims(raw_output_sm, dim=3), tf.float32)
        print("Prediction is: ", self.pred)

        # Create directory
        if not os.path.exists(self.conf.out_dir):
            os.makedirs(self.conf.out_dir)
            os.makedirs(self.conf.out_dir + '/prediction')
            if self.conf.visual:
                os.makedirs(self.conf.out_dir + '/visual_prediction')

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #9
0
    def __init__(self, images, config, norm=None, shuffle=True):

        self.images = images

        # self.true_box_buffer = config['true_box_buffer'] # Maximun objects per box!!
        self.batch_size = config['batch_size']
        self.anchors = config['anchors']
        self.nb_anchors = len(config['anchors'])

        self.img_w, self.img_h = config['image_shape']
        self.grid = config['grid']
        self.img_encoder = ImageReader(img_width=self.img_w,
                                       img_height=self.img_h,
                                       norm=norm,
                                       grid=self.grid)

        self.labels = np.array(config['labels'])

        self.shuffle = shuffle
        if self.shuffle:
            np.random.shuffle(self.images)
Beispiel #10
0
def get_data_queue(args, coord, is_training=True):
    h, w = map(int, args.input_size.split(','))
    input_size_img = (h, w)
    input_size_label = (h / FEATSTRIDE, w / FEATSTRIDE)

    # Load reader.
    if is_training:
        with tf.name_scope("create_train_inputs"):
            reader_train = ImageReader(
                args.data_dir,
                args.data_train_list,
                input_size_img,
                input_size_label,
                RANDOM_SCALE,
                IMG_MEAN,
                coord)
            image_batch_train, label_batch_train = reader_train.dequeue(args.batch_size)
            return image_batch_train, label_batch_train
    else:
        with tf.name_scope("create_val_inputs"):
            reader_val = ImageReader(
                args.data_dir,
                args.data_val_list,
                input_size_img,
                input_size_label,
                False,
                IMG_MEAN,
                coord)
            image_batch_val, label_batch_val = reader_val.dequeue(args.batch_size, is_training=False)
            return image_batch_val, label_batch_val
Beispiel #11
0
def query_upload_image():
    image_file = request.files['file']

    radius = json.loads(request.form['radius'])
    entity_types = json.loads(request.form['type'])
    entity_types = [ent_type['value'] for ent_type in entity_types]
    query_types = [
        typ for ent_type in entity_types
        for typ in list(ENTITY_TYPES[ent_type].keys())
    ]

    pil_image = ImageReader.read_and_resize(image_file.stream)

    return geowine.retrieve_entities_with_pil_image(image=pil_image,
                                                    radius=radius,
                                                    entity_type=query_types)
Beispiel #12
0
    def predict_setup(self):
        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.test_data_list,
                                 None, False, False, self.conf.ignore_label,
                                 IMG_MEAN, self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        image_batch, label_batch = tf.expand_dims(image,
                                                  dim=0), tf.expand_dims(label,
                                                                         dim=0)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(image_batch, self.conf.num_classes, False)
        else:
            net = ResNet_segmentation(image_batch, self.conf.num_classes,
                                      False, self.conf.encoder_name)
            pass

        # Predictions.
        raw_output = net.outputs
        raw_output = tf.image.resize_bilinear(raw_output,
                                              tf.shape(image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        self.pred = tf.cast(tf.expand_dims(raw_output, dim=3), tf.uint8)

        # Create directory
        if not os.path.exists(self.conf.out_dir):
            os.makedirs(self.conf.out_dir)
            os.makedirs(self.conf.out_dir + '/prediction')
            if self.conf.visual:
                os.makedirs(self.conf.out_dir + '/visual_prediction')

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
        pass
Beispiel #13
0
def train(net, data_dict, optim):
    net.train()
    data_set = ImageReader(data_dict, get_transform(DATA_NAME, 'train'))
    data_loader = DataLoader(data_set,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=8)

    l_data, t_data, n_data = 0.0, 0, 0
    for inputs, labels in data_loader:
        optim.zero_grad()
        out = net(inputs.to(DEVICE))
        loss = criterion(out, labels.to(DEVICE))
        print('loss:{:.4f}'.format(loss.item()), end='\r')
        loss.backward()
        optim.step()
        _, pred = torch.max(out, 1)
        l_data += loss.item()
        t_data += torch.sum(pred.cpu() == labels).item()
        n_data += len(labels)

    return l_data / n_data, t_data / n_data
	def train_setup(self):
		tf.set_random_seed(self.conf.random_seed)
		
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Input size
		input_size = (self.conf.input_height, self.conf.input_width)
		
		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.data_list,
				input_size,
				self.conf.random_scale,
				self.conf.random_mirror,
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size)
		
		# Create network
		if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50, or deeplab")
			sys.exit(-1)
		elif self.conf.encoder_name == 'deeplab':
			net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
			# Variables that load from pre-trained model.
			restore_var = [v for v in tf.global_variables() if 'fc' not in v.name]
			# Trainable Variables
			all_trainable = tf.trainable_variables()
			# Fine-tune part
			encoder_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
			# Decoder part
			decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
		else:
			net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name)
			# Variables that load from pre-trained model.
			restore_var = [v for v in tf.global_variables() if 'resnet_v1' in v.name]
			# Trainable Variables
			all_trainable = tf.trainable_variables()
			# Fine-tune part
			encoder_trainable = [v for v in all_trainable if 'resnet_v1' in v.name] # lr * 1.0
			# Decoder part
			decoder_trainable = [v for v in all_trainable if 'decoder' in v.name]
		
		decoder_w_trainable = [v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name] # lr * 10.0
		decoder_b_trainable = [v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name] # lr * 20.0
		# Check
		assert(len(all_trainable) == len(decoder_trainable) + len(encoder_trainable))
		assert(len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable))

		# Network raw output
		raw_output = net.outputs # [batch_size, h, w, 21]

		# Output size
		output_shape = tf.shape(raw_output)
		output_size = (output_shape[1], output_shape[2])

		# Groud Truth: ignoring all labels greater or equal than n_classes
		label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False)
		raw_gt = tf.reshape(label_proc, [-1,])
		indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
		gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
		raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
		prediction = tf.gather(raw_prediction, indices)

		# Pixel-wise softmax_cross_entropy loss
		loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
		# L2 regularization
		l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name]
		# Loss function
		self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

		# Define optimizers
		# 'poly' learning rate
		base_lr = tf.constant(self.conf.learning_rate)
		self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
		learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power))
		# We have several optimizers here in order to handle the different lr_mult
		# which is a kind of parameters in Caffe. This controls the actual lr for each
		# layer.
		opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
		opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
		opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
		# To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
		# Instead, we separate the steps compute_grads+update_params.
		# Compute grads
		grads = tf.gradients(self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable)
		grads_encoder = grads[:len(encoder_trainable)]
		grads_decoder_w = grads[len(encoder_trainable) : (len(encoder_trainable) + len(decoder_w_trainable))]
		grads_decoder_b = grads[(len(encoder_trainable) + len(decoder_w_trainable)):]
		# Update params
		train_op_conv = opt_encoder.apply_gradients(zip(grads_encoder, encoder_trainable))
		train_op_fc_w = opt_decoder_w.apply_gradients(zip(grads_decoder_w, decoder_w_trainable))
		train_op_fc_b = opt_decoder_b.apply_gradients(zip(grads_decoder_b, decoder_b_trainable))
		# Finally, get the train_op!
		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for collecting moving_mean and moving_variance
		with tf.control_dependencies(update_ops):
			self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

		# Saver for storing checkpoints of the model
		self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0)

		# Loader for loading the pre-trained model
		self.loader = tf.train.Saver(var_list=restore_var)

		# Training summary
		# Processed predictions: for visualisation.
		raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
		raw_output_up = tf.argmax(raw_output_up, axis=3)
		self.pred = tf.expand_dims(raw_output_up, dim=3)
		# Image summary.
		images_summary = tf.py_func(inv_preprocess, [self.image_batch, 2, IMG_MEAN], tf.uint8)
		labels_summary = tf.py_func(decode_labels, [self.label_batch, 2, self.conf.num_classes], tf.uint8)
		preds_summary = tf.py_func(decode_labels, [self.pred, 2, self.conf.num_classes], tf.uint8)
		self.total_summary = tf.summary.image('images',
			tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
			max_outputs=2) # Concatenate row-wise.
		if not os.path.exists(self.conf.logdir):
			os.makedirs(self.conf.logdir)
		self.summary_writer = tf.summary.FileWriter(self.conf.logdir, graph=tf.get_default_graph())
Beispiel #15
0
    data_path, data_name, crop_type, backbone_type = opt.data_path, opt.data_name, opt.crop_type, opt.backbone_type
    gd_config, feature_dim, smoothing, temperature = opt.gd_config, opt.feature_dim, opt.smoothing, opt.temperature
    margin, recalls, batch_size = opt.margin, [
        int(k) for k in opt.recalls.split(',')
    ], opt.batch_size
    num_epochs = opt.num_epochs
    save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}_{}'.format(
        data_name, crop_type, backbone_type, gd_config, feature_dim, smoothing,
        temperature, margin, batch_size)

    results = {'train_loss': [], 'train_accuracy': []}
    for recall_id in recalls:
        results['test_recall@{}'.format(recall_id)] = []

    # dataset loader
    train_data_set = ImageReader(data_path, data_name, 'train', crop_type)
    train_sample = MPerClassSampler(train_data_set.labels, batch_size)
    train_data_loader = DataLoader(train_data_set,
                                   batch_sampler=train_sample,
                                   num_workers=8)
    test_data_set = ImageReader(data_path, data_name,
                                'query' if data_name == 'isc' else 'test',
                                crop_type)
    test_data_loader = DataLoader(test_data_set,
                                  batch_size,
                                  shuffle=False,
                                  num_workers=8)
    eval_dict = {'test': {'data_loader': test_data_loader}}
    if data_name == 'isc':
        gallery_data_set = ImageReader(data_path, data_name, 'gallery',
                                       crop_type)
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        h, w = (self.conf.input_height, self.conf.input_width)
        input_size = (h, w)

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            self.image_batch, self.label_batch = reader.dequeue(
                self.conf.batch_size)
            image_batch_075 = tf.image.resize_images(
                self.image_batch, [int(h * 0.75), int(w * 0.75)])
            image_batch_05 = tf.image.resize_images(
                self.image_batch, [int(h * 0.5), int(w * 0.5)])
            # #testWWang
            # image = self.image_batch[0]
            # label = self.label_batch[0]
            # utils.save_image(image, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images",
            #                  name = image ,mean = IMG_MEAN)
            # utils.save_image(label, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images",
            #                  name=label, mean=IMG_MEAN)
            #
            # #end

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            with tf.variable_scope('', reuse=False):
                net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
            with tf.variable_scope('', reuse=True):
                net075 = Deeplab_v2(image_batch_075, self.conf.num_classes,
                                    True)
            with tf.variable_scope('', reuse=True):
                net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'fc' not in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
        else:
            with tf.variable_scope('', reuse=False):
                net = ResNet_segmentation(self.image_batch,
                                          self.conf.num_classes, True,
                                          self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net075 = ResNet_segmentation(image_batch_075,
                                             self.conf.num_classes, True,
                                             self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net05 = ResNet_segmentation(image_batch_05,
                                            self.conf.num_classes, True,
                                            self.conf.encoder_name)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'resnet_v1' in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'resnet_v1' in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [
                v for v in all_trainable if 'decoder' in v.name
            ]

        decoder_w_trainable = [
            v for v in decoder_trainable
            if 'weights' in v.name or 'gamma' in v.name
        ]  # lr * 10.0
        decoder_b_trainable = [
            v for v in decoder_trainable
            if 'biases' in v.name or 'beta' in v.name
        ]  # lr * 20.0
        # Check
        assert (len(all_trainable) == len(decoder_trainable) +
                len(encoder_trainable))
        assert (len(decoder_trainable) == len(decoder_w_trainable) +
                len(decoder_b_trainable))

        # Network raw output
        raw_output100 = net.outputs
        raw_output075 = net075.outputs
        raw_output05 = net05.outputs
        raw_output = tf.reduce_max(tf.stack([
            raw_output100,
            tf.image.resize_images(raw_output075,
                                   tf.shape(raw_output100)[1:3, ]),
            tf.image.resize_images(raw_output05,
                                   tf.shape(raw_output100)[1:3, ])
        ]),
                                   axis=0)

        # Groud Truth: ignoring all labels greater or equal than n_classes
        label_proc = prepare_label(self.label_batch,
                                   tf.stack(raw_output.get_shape()[1:3]),
                                   num_classes=self.conf.num_classes,
                                   one_hot=False)  # [batch_size, h, w]
        label_proc075 = prepare_label(self.label_batch,
                                      tf.stack(raw_output075.get_shape()[1:3]),
                                      num_classes=self.conf.num_classes,
                                      one_hot=False)
        label_proc05 = prepare_label(self.label_batch,
                                     tf.stack(raw_output05.get_shape()[1:3]),
                                     num_classes=self.conf.num_classes,
                                     one_hot=False)

        raw_gt = tf.reshape(label_proc, [
            -1,
        ])
        raw_gt075 = tf.reshape(label_proc075, [
            -1,
        ])
        raw_gt05 = tf.reshape(label_proc05, [
            -1,
        ])

        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        indices075 = tf.squeeze(
            tf.where(tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1)
        indices05 = tf.squeeze(
            tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1)

        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
        gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

        raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
        raw_prediction100 = tf.reshape(raw_output100,
                                       [-1, self.conf.num_classes])
        raw_prediction075 = tf.reshape(raw_output075,
                                       [-1, self.conf.num_classes])
        raw_prediction05 = tf.reshape(raw_output05,
                                      [-1, self.conf.num_classes])

        prediction = tf.gather(raw_prediction, indices)
        prediction100 = tf.gather(raw_prediction100, indices)
        prediction075 = tf.gather(raw_prediction075, indices075)
        prediction05 = tf.gather(raw_prediction05, indices05)

        # Pixel-wise softmax_cross_entropy loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction100, labels=gt)
        loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction075, labels=gt075)
        loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction05, labels=gt05)
        # L2 regularization
        l2_losses = [
            self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable
            if 'weights' in v.name
        ]
        # Loss function
        self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
            loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(
                loss05) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr,
            tf.pow((1 - self.curr_step / self.conf.num_steps),
                   self.conf.power))
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt_encoder = tf.train.MomentumOptimizer(learning_rate,
                                                 self.conf.momentum)
        opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                   self.conf.momentum)
        opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                   self.conf.momentum)

        # Gradient accumulation
        # Define a variable to accumulate gradients.
        accum_grads = [
            tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False)
            for v in encoder_trainable + decoder_w_trainable +
            decoder_b_trainable
        ]
        # Define an operation to clear the accumulated gradients for next batch.
        self.zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads]
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        # Accumulate and normalise the gradients.
        self.accum_grads_op = [
            accum_grads[i].assign_add(grad / self.conf.grad_update_every)
            for i, grad in enumerate(grads)
        ]

        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        grads_encoder = accum_grads[:len(encoder_trainable)]
        grads_decoder_w = accum_grads[len(encoder_trainable):(
            len(encoder_trainable) + len(decoder_w_trainable))]
        grads_decoder_b = accum_grads[(len(encoder_trainable) +
                                       len(decoder_w_trainable)):]
        # Update params
        train_op_conv = opt_encoder.apply_gradients(
            zip(grads_encoder, encoder_trainable))
        train_op_fc_w = opt_decoder_w.apply_gradients(
            zip(grads_decoder_w, decoder_w_trainable))
        train_op_fc_b = opt_decoder_b.apply_gradients(
            zip(grads_decoder_b, decoder_b_trainable))
        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_conv, train_op_fc_w,
                                     train_op_fc_b)

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        self.loader = tf.train.Saver(var_list=restore_var)

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, dim=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 1, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 1, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 1, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=20)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
    def test_setup(self):
        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(
                self.conf.data_dir,
                self.conf.valid_data_list,
                None,  # the images have different sizes
                False,  # no data-aug
                False,  # no data-aug
                self.conf.ignore_label,
                IMG_MEAN,
                self.coord)
            image, label = reader.image, reader.label  # [h, w, 3 or 1]
        # Add one batch dimension [1, h, w, 3 or 1]
        self.image_batch, self.label_batch = tf.expand_dims(
            image, dim=0), tf.expand_dims(label, dim=0)
        h_orig, w_orig = tf.to_float(tf.shape(
            self.image_batch)[1]), tf.to_float(tf.shape(self.image_batch)[2])
        image_batch_075 = tf.image.resize_images(
            self.image_batch,
            tf.stack([
                tf.to_int32(tf.multiply(h_orig, 0.75)),
                tf.to_int32(tf.multiply(w_orig, 0.75))
            ]))
        image_batch_05 = tf.image.resize_images(
            self.image_batch,
            tf.stack([
                tf.to_int32(tf.multiply(h_orig, 0.5)),
                tf.to_int32(tf.multiply(w_orig, 0.5))
            ]))

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            with tf.variable_scope('', reuse=False):
                net = Deeplab_v2(self.image_batch, self.conf.num_classes,
                                 False)
            with tf.variable_scope('', reuse=True):
                net075 = Deeplab_v2(image_batch_075, self.conf.num_classes,
                                    False)
            with tf.variable_scope('', reuse=True):
                net05 = Deeplab_v2(image_batch_05, self.conf.num_classes,
                                   False)
        else:
            with tf.variable_scope('', reuse=False):
                net = ResNet_segmentation(self.image_batch,
                                          self.conf.num_classes, False,
                                          self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net075 = ResNet_segmentation(image_batch_075,
                                             self.conf.num_classes, False,
                                             self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net05 = ResNet_segmentation(image_batch_05,
                                            self.conf.num_classes, False,
                                            self.conf.encoder_name)

        # predictions
        # Network raw output
        raw_output100 = net.outputs
        raw_output075 = net075.outputs
        raw_output05 = net05.outputs
        raw_output = tf.reduce_max(tf.stack([
            raw_output100,
            tf.image.resize_images(raw_output075,
                                   tf.shape(raw_output100)[1:3, ]),
            tf.image.resize_images(raw_output05,
                                   tf.shape(raw_output100)[1:3, ])
        ]),
                                   axis=0)
        raw_output = tf.image.resize_bilinear(
            raw_output,
            tf.shape(self.image_batch)[1:3, ])
        raw_output = tf.argmax(raw_output, axis=3)
        pred = tf.expand_dims(raw_output, dim=3)
        self.pred = tf.reshape(pred, [
            -1,
        ])
        # labels
        gt = tf.reshape(self.label_batch, [
            -1,
        ])
        # Ignoring all labels greater than or equal to n_classes.
        temp = tf.less_equal(gt, self.conf.num_classes - 1)
        weights = tf.cast(temp, tf.int32)

        # fix for tf 1.3.0
        gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

        # Pixel accuracy
        self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy(
            self.pred, gt, weights=weights)

        # mIoU
        self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # confusion matrix
        self.confusion_matrix = tf.contrib.metrics.confusion_matrix(
            self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

        # Loader for loading the checkpoint
        self.loader = tf.train.Saver(var_list=tf.global_variables())
Beispiel #18
0
    def train_setup(self, reuse=False):
        tf.set_random_seed(self.conf.random_seed)
        num_layers = 50  #-----------------------------------------------------------------------------------------

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()
        self.n_gpu = self.conf.n_gpu

        # Input size
        self.input_size = (self.conf.input_height, self.conf.input_width)
        j_step = 0
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 self.input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            # print "1"*22
            # print reader
        image_data, image_label = reader.dequeue(self.conf.batch_size)
        self.image_data = image_data
        if tf.__version__.startswith('1.'):
            split_train_data_node = tf.split(image_data, self.n_gpu)
            split_train_labels_node = tf.split(image_label, self.n_gpu)
        else:
            split_train_data_node = tf.split(0, self.n_gpu, image_data)
            split_train_labels_node = tf.split(0, self.n_gpu, image_label)
        with tf.variable_scope(tf.get_variable_scope()):
            all_loss = []
            for device_index, (i, self.image_batch,
                               self.label_batch) in enumerate(
                                   zip([1], split_train_data_node,
                                       split_train_labels_node)):
                with tf.device('/gpu:%d' % i):
                    #print i
                    with tf.name_scope('%s_%d' % ("gpu", i)) as scope:
                        if j_step == 0:
                            j_step = 1
                            pass
                        else:
                            reuse = True
                        # net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes,
                        #                          is_training=self.conf.is_training ,reuse=reuse)
                        net, end_points = deeplabv3(
                            self.image_batch,
                            num_classes=self.conf.num_classes,
                            depth=num_layers,
                            is_training=True,
                            reuse=reuse)
                        self.raw_output = end_points[
                            'gpu_{}/resnet{}/logits'.format(i, num_layers)]
                        # Network raw output
                        # [batch_size, 41, 41, 21]
                        output_size = (self.raw_output.shape[1].value,
                                       self.raw_output.shape[2].value)

                        label_proc = prepare_label(
                            self.label_batch,
                            output_size,
                            num_classes=self.conf.num_classes,
                            one_hot=False)  # [batch_size, 41, 41]
                        raw_gt = tf.reshape(label_proc, [
                            -1,
                        ])
                        indices = tf.squeeze(
                            tf.where(
                                tf.less_equal(raw_gt,
                                              self.conf.num_classes - 1)), 1)
                        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                        raw_prediction = tf.reshape(
                            self.raw_output, [-1, self.conf.num_classes])
                        # print raw_prediction
                        # print gt
                        prediction = raw_prediction
                        # prediction = tf.expand_dims(raw_prediction,  3)
                        # prediction = tl.act.pixel_wise_softmax(prediction)
                        # print prediction
                        # print label_proc
                        # loss = 1 - tl.cost.dice_coe(prediction, label_proc, axis=[1, 2, 3, 4])
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=prediction, labels=gt)
                        l2_losses = [
                            self.conf.weight_decay * tf.nn.l2_loss(v)
                            for v in tf.trainable_variables()
                            if 'weights' in v.name
                        ]
                        # Loss function
                        all_loss.append(
                            tf.reduce_mean(loss) + tf.add_n(l2_losses))
                        tf.get_variable_scope().reuse_variables()

        # Output size
        #output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value)

        # Variables that load from pre-trained model.
        # For training, last few layers should not be loaded.
        if self.conf.pretrain_file is not None:
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            original_step = int(self.conf.pretrain_file.split("-")[-1])
        else:
            original_step = 0
        num_steps = self.conf.num_steps + original_step
        # Trainable Variables
        # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
        # if they are presented in var_list of the optimiser definition.
        # So we remove them from the list.
        all_trainable = [
            v for v in tf.trainable_variables()
            if 'beta' not in v.name and 'gamma' not in v.name
        ]
        # Fine-tune part
        conv_trainable = [v for v in all_trainable
                          if 'fc' not in v.name]  # lr * 1.0
        # ASPP part
        fc_trainable = [v for v in all_trainable if 'fc' in v.name]
        # fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name]  # lr * 10.0
        # fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name]  # lr * 20.0
        # check
        #assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
        #assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

        # Groud Truth: ignoring all labels greater or equal than n_classes
        #label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes,
        #one_hot=False)  # [batch_size, 41, 41]
        #raw_gt = tf.reshape(label_proc, [-1, ])
        #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        #gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        #raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes])
        #prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax_cross_entropy loss
        #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
        # L2 regularization
        #l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
        # Loss function
        self.reduced_loss = tf.add_n(all_loss) / self.n_gpu
        #self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        self.loss_trans = tf.placeholder(dtype=tf.float32, shape=())
        self.final_loss = (self.reduced_loss + self.loss_trans) / 2

        learning_rate = tf.scalar_mul(
            base_lr, tf.pow((1 - self.curr_step / num_steps), self.conf.power))
        #print self.conf.power
        self.learning_rate = learning_rate
        #print  learning_rate
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt = tf.train.AdamOptimizer(learning_rate, self.conf.momentum, 0.98)
        #opt= tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
        #opt_fc_w = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98)
        #opt_fc_b = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98)
        #opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
        #opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
        #opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads_conv = tf.gradients(self.final_loss, conv_trainable)
        # train_op = opt.apply_gradients(zip(grads_conv, conv_trainable))
        #grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
        grads_conv = grads_conv[:len(conv_trainable)]
        # grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))]
        # grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
        # Update params
        train_op_conv = opt.apply_gradients(zip(grads_conv, conv_trainable))
        # train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
        # train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
        # train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
        # Finally, get the train_op!
        self.train_op = train_op_conv
        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        if self.conf.pretrain_file is not None:
            self.loader = tf.train.Saver(var_list=restore_var)
Beispiel #19
0
def main():
    """Create the model and start the evaluation process."""
    args = get_arguments()

    # Create queue coordinator.
    coord = tf.train.Coordinator()

    # Load reader.
    with tf.name_scope("create_inputs"):
        reader = ImageReader(
            args.data_dir,
            args.data_list,
            None,  # No defined input size.
            False,  # No random scale.
            False,  # No random mirror.
            args.ignore_label,
            IMG_MEAN,
            coord)
        image, label = reader.image, reader.label
    image_batch, label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(
        label, dim=0)  # Add one batch dimension.

    # Create network.
    if args.encoder_name not in ['res101', 'res50']:
        print('encoder_name ERROR!')
        print("Please input: res101, res50")
        sys.exit(-1)
    else:
        net = ResNet_segmentation(image_batch, args.num_classes, False,
                                  args.encoder_name)

    # predictions
    raw_output = net.outputs
    raw_output = tf.image.resize_bilinear(raw_output,
                                          tf.shape(image_batch)[1:3, ])
    raw_output = tf.argmax(raw_output, axis=3)
    pred = tf.expand_dims(raw_output, dim=3)
    pred = tf.reshape(pred, [
        -1,
    ])
    # labels
    gt = tf.reshape(label_batch, [
        -1,
    ])
    # Ignoring all labels greater than or equal to n_classes.
    temp = tf.less_equal(gt, args.num_classes - 1)
    weights = tf.cast(temp, tf.int32)

    # fix for tf 1.3.0
    gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

    # Which variables to load.
    restore_var = tf.global_variables()

    # Predictions.
    raw_output = net.outputs
    raw_output = tf.image.resize_bilinear(raw_output,
                                          tf.shape(image_batch)[1:3, ])
    raw_output = tf.argmax(raw_output, dimension=3)
    pred = tf.expand_dims(raw_output, dim=3)  # Create 4-d tensor.
    pred = tf.reshape(pred, [
        -1,
    ])

    #groud truth
    gt = tf.reshape(label_batch, [
        -1,
    ])
    indexes = tf.less_equal(gt, args.num_classes - 1)
    gt = tf.where(indexes, gt, tf.cast(temp, tf.uint8))
    weights = tf.cast(
        indexes,
        tf.int32)  # Ignoring all labels greater than or equal to n_classes.
    # mIoU
    mIoU, update_op = tf.contrib.metrics.streaming_mean_iou(
        pred, gt, num_classes=args.num_classes, weights=weights)

    # Pixel accuracy
    accu, accu_update_op = tf.contrib.metrics.streaming_accuracy(
        pred, gt, weights=weights)

    # Set up tf session and initialize variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.global_variables_initializer()

    sess.run(init)
    sess.run(tf.local_variables_initializer())

    # Load weights.
    loader = tf.train.Saver(var_list=restore_var)
    if args.restore_from is not None:
        load(loader, sess, args.restore_from)

    # Start queue threads.
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    # Iterate over training steps.
    for step in range(args.num_steps):
        preds, _, _ = sess.run([pred, update_op, accu_update_op])
        if step % 100 == 0:
            print('step {:d}'.format(step))
    print('Mean IoU: {:.3f}'.format(mIoU.eval(session=sess)))
    print('Pixel Accuracy: {:.3f}'.format(accu.eval(session=sess)))
    coord.request_stop()
    coord.join(threads)
Beispiel #20
0
        int(k) for k in opt.recalls.split(',')
    ], opt.batch_size
    num_epochs = opt.num_epochs
    save_name_pre = '{}_{}_{}_{}_{}_{}_{}_{}'.format(data_name, backbone_type,
                                                     gd_config, feature_dim,
                                                     smoothing, temperature,
                                                     margin, batch_size)

    results = {'train_loss': [], 'train_accuracy': []}
    for recall_id in recalls:
        results['test_recall@{}'.format(recall_id)] = []

    process_sop_data(opt.data_dir, opt.df_path)

    # dataset loader
    train_data_set = ImageReader(data_path, data_name, 'train')
    train_sample = MPerClassSampler(train_data_set.labels, batch_size)
    train_data_loader = DataLoader(train_data_set,
                                   batch_sampler=train_sample,
                                   num_workers=opt.workers,
                                   pin_memory=True)
    test_data_set = ImageReader(data_path, data_name, 'test')
    test_data_loader = DataLoader(test_data_set,
                                  batch_size,
                                  shuffle=False,
                                  num_workers=opt.workers,
                                  pin_memory=True)
    eval_dict = {'test': {'data_loader': test_data_loader}}

    # model setup, model profile, optimizer config and loss definition
    model = Model(backbone_type,
Beispiel #21
0
	def test_setup(self):
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.valid_data_list,
				None, # the images have different sizes
				False, # no data-aug
				False, # no data-aug
				self.conf.ignore_label,
				self.coord)
			image, label = reader.image, reader.label # [h, w, 3 or 1]
		# Add one batch dimension [1, h, w, 3 or 1]
		self.image_batch, self.label_batch = tf.expand_dims(image, dim=0), tf.expand_dims(label, dim=0)
		self.image_batch = tf.identity( self.image_batch, name='image_batch')
		self.image_batch -= IMG_MEAN
		# Create network
		if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50, or deeplab")
			sys.exit(-1)
		elif self.conf.encoder_name == 'deeplab':
			net = Deeplab_v2(self.image_batch, self.conf.num_classes, False)
		else:
			net = ResNet_segmentation(self.image_batch, self.conf.num_classes, False, self.conf.encoder_name)
		
		# predictions
		raw_output = net.outputs
		raw_output = tf.image.resize_bilinear(raw_output, tf.shape(self.image_batch)[1:3,])
		raw_output = tf.argmax(raw_output, axis=3)
		pred = tf.expand_dims(raw_output, dim=3)
		self.pred = tf.reshape(pred, [-1,], name="predictions")
		# labels
		gt = tf.reshape(self.label_batch, [-1,])
		# Ignoring all labels greater than or equal to n_classes.
		temp = tf.less_equal(gt, self.conf.num_classes - 1)
		weights = tf.cast(temp, tf.int32)

		# fix for tf 1.3.0
		gt = tf.where(temp, gt, tf.cast(temp, tf.uint8))

		# Pixel accuracy
		self.accu, self.accu_update_op = tf.contrib.metrics.streaming_accuracy(
			self.pred, gt, weights=weights)

		# mIoU
		self.mIoU, self.mIou_update_op = tf.contrib.metrics.streaming_mean_iou(
			self.pred, gt, num_classes=self.conf.num_classes, weights=weights)

		# f1 score
		pred = tf.cast(self.pred, tf.int32)
		gt = tf.cast(gt, tf.int32)
        
		self.areaOverlap = tf.count_nonzero(pred * gt)
		self.areaGTObj = tf.count_nonzero(gt)
		self.areaPredicted = tf.count_nonzero(pred)
        
        
		# Loader for loading the checkpoint
		self.loader = tf.train.Saver(var_list=tf.global_variables())
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        input_size = (self.conf.input_height, self.conf.input_width)

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            self.image_batch, self.label_batch = reader.dequeue(
                self.conf.batch_size)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'fc' not in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
        else:
            net = ResNet_segmentation(self.image_batch, self.conf.num_classes,
                                      True, self.conf.encoder_name)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'resnet_v1' in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'resnet_v1' in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [
                v for v in all_trainable if 'decoder' in v.name
            ]

        decoder_w_trainable = [
            v for v in decoder_trainable
            if 'weights' in v.name or 'gamma' in v.name
        ]  # lr * 10.0
        decoder_b_trainable = [
            v for v in decoder_trainable
            if 'biases' in v.name or 'beta' in v.name
        ]  # lr * 20.0
        # Check
        assert (len(all_trainable) == len(decoder_trainable) +
                len(encoder_trainable))
        assert (len(decoder_trainable) == len(decoder_w_trainable) +
                len(decoder_b_trainable))

        # Network raw output
        raw_output = net.outputs  # [batch_size, h, w, 21]

        # Output size
        output_shape = tf.shape(raw_output)
        output_size = (output_shape[1], output_shape[2])

        # Groud Truth: ignoring all labels greater or equal than n_classes
        label_proc = prepare_label(self.label_batch,
                                   output_size,
                                   num_classes=self.conf.num_classes,
                                   one_hot=False)
        raw_gt = tf.reshape(label_proc, [
            -1,
        ])
        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
        prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax_cross_entropy loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        # L2 regularization
        l2_losses = [
            self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable
            if 'weights' in v.name
        ]
        # Loss function
        self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr,
            tf.pow((1 - self.curr_step / self.conf.num_steps),
                   self.conf.power))
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt_encoder = tf.train.MomentumOptimizer(learning_rate,
                                                 self.conf.momentum)
        opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                   self.conf.momentum)
        opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                   self.conf.momentum)
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        grads_encoder = grads[:len(encoder_trainable)]
        grads_decoder_w = grads[len(encoder_trainable):(
            len(encoder_trainable) + len(decoder_w_trainable))]
        grads_decoder_b = grads[(len(encoder_trainable) +
                                 len(decoder_w_trainable)):]
        # Update params
        train_op_conv = opt_encoder.apply_gradients(
            zip(grads_encoder, encoder_trainable))
        train_op_fc_w = opt_decoder_w.apply_gradients(
            zip(grads_decoder_w, decoder_w_trainable))
        train_op_fc_b = opt_decoder_b.apply_gradients(
            zip(grads_decoder_b, decoder_b_trainable))
        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_conv, train_op_fc_w,
                                     train_op_fc_b)

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        self.loader = tf.train.Saver(var_list=restore_var)

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, dim=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 2, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 2, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 2, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=2)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
Beispiel #23
0
 def _read_image_from_path(self, path):
     return ImageReader.read_from_path(path)
Beispiel #24
0
class BatchGenerator(Sequence):
    def __init__(self, images, config, norm=None, shuffle=True):

        self.images = images

        # self.true_box_buffer = config['true_box_buffer'] # Maximun objects per box!!
        self.batch_size = config['batch_size']
        self.anchors = config['anchors']
        self.nb_anchors = len(config['anchors'])

        self.img_w, self.img_h = config['image_shape']
        self.grid = config['grid']
        self.img_encoder = ImageReader(img_width=self.img_w,
                                       img_height=self.img_h,
                                       norm=norm,
                                       grid=self.grid)

        self.labels = np.array(config['labels'])

        self.shuffle = shuffle
        if self.shuffle:
            np.random.shuffle(self.images)

    def __getitem__(self, idx):
        '''
        Arguments:
        ---------
        idx : [int] non-negative integer value e.g., 0
        
        Return:
        ------
        x_batch: [np.array] Array of shape  (BATCH_SIZE, IMAGE_H, IMAGE_W, N channels).
            
            x_batch[iframe,:,:,:] contains a iframeth frame of size  (IMAGE_H,IMAGE_W).
            
        y_batch: [np.array] Array of shape  (BATCH_SIZE, GRID_H, GRID_W, BOX, 4 + 1 + N classes). 
                            BOX = The number of anchor boxes.

            y_batch[iframe,igrid_h,igrid_w,ianchor,:4] contains (center_x,center_y,center_w,center_h) 
            of ianchorth anchor at  grid cell=(igrid_h,igrid_w) if the object exists in 
            this (grid cell, anchor) pair, else they simply contain 0.
            Bbox's center coordinates (x,y) are given between 0 and 1 relative to cell's origin 
            (i.e. 0.4 means 40% from cell's origin) and its dimensions relative to the cell's size 
            (i.e. 3.4 means 3.4 times the cell's grid)

            y_batch[iframe,igrid_h,igrid_w,ianchor,4] contains 1 if the object exists in this 
            (grid cell, anchor) pair, else it contains 0.

            y_batch[iframe,igrid_h,igrid_w,ianchor,5 + iclass] contains 1 if the iclass^th 
            class object exists in this (grid cell, anchor) pair, else it contains 0.


        b_batch: [np.array] Array of shape (BATCH_SIZE, 1, 1, 1, TRUE_BOX_BUFFER, 4).

            b_batch[iframe,1,1,1,ibuffer,ianchor,:] contains ibufferth object's 
            (center_x,center_y,center_w,center_h) in iframeth frame.

            If ibuffer > N objects in iframeth frame, then the values are simply 0.

            TRUE_BOX_BUFFER has to be some large number, so that the frame with the 
            biggest number of objects can also record all objects.

            The order of the objects do not matter.

            This is just a hack to easily calculate loss. 
        
        '''
        l_bound = idx * self.batch_size
        r_bound = (idx + 1) * self.batch_size

        if r_bound > len(self.images):
            r_bound = len(self.images)
            l_bound = r_bound - self.batch_size

        instance_count = 0

        # Prepare storage for outputs
        x_batch = np.zeros(
            (r_bound - l_bound, self.img_h, self.img_w, 3))  # Input images
        y_batch = np.zeros((r_bound - l_bound, self.grid[1], self.grid[0],
                            self.nb_anchors, 5 + len(self.labels)))
        # b_batch = np.zeros((r_bound - l_bound, 1, 1, 1, self.true_box_buffer, 4))

        grid_width = float(self.img_w) / self.grid[0]
        grid_height = float(self.img_h) / self.grid[1]

        iou_vfunc = np.frompyfunc(
            lambda w1, h1, w2, h2: calculate_IOU(np.array([w1, h1]),
                                                 np.array([w2, h2])), 4, 1)

        for train_instance in self.images[l_bound:r_bound]:
            # Resize image
            img, all_objs = self.img_encoder.fit_data(train_instance)

            # Construct output from object's x, y, w, h
            true_box_index = 0
            for obj in all_objs:
                if (obj['xmax'] > obj['xmin']) and (
                        obj['ymax'] > obj['ymin']) and (obj['name']
                                                        in self.labels):
                    center_x, center_y, center_w, center_h = self.img_encoder.abs2grid(
                        obj)

                    grid_x = int(np.floor(center_x))
                    grid_y = int(np.floor(center_y))

                    # Now we save in y_batch, center position relative to cell's origin
                    center_x -= grid_x
                    center_y -= grid_y

                    if (grid_x < self.grid[0]) and (grid_y < self.grid[1]):
                        obj_idx = self.labels.tolist().index(obj['name'])

                        ious = iou_vfunc(self.anchors[:, 0], self.anchors[:,
                                                                          1],
                                         center_w, center_h)
                        best_anchor_id = np.argmax(ious)

                        # Assign ground truth x, y, w, h, confidence and class probs to y_batch
                        # it could happen that the same grid cell contain 2 similar shape objects
                        # as a result the same anchor box is selected as the best anchor box by the multiple objects
                        # in such ase, the object is over written

                        # As stated in paper, width and height are predicted
                        # relative to anchor's dimensions

                        center_w = center_w * (grid_width /
                                               self.anchors[best_anchor_id, 0])
                        center_h = center_h * (grid_height /
                                               self.anchors[best_anchor_id, 1])

                        bbox = [center_x, center_y, center_w, center_h]

                        # center_x, center_y, w, h and 1 because ground truth confidence is always 1
                        y_batch[instance_count, grid_y, grid_x, best_anchor_id,
                                0:4] = bbox
                        y_batch[instance_count, grid_y, grid_x, best_anchor_id,
                                4] = 1
                        # Class' probability for detected object
                        y_batch[instance_count, grid_y, grid_x, best_anchor_id,
                                5 + obj_idx] = 1

                        # Assign the true bbox to b_batch
                        # b_batch[instance_count, 0, 0, 0, true_box_index] = bbox
                        # true_box_index = (true_box_index + 1) % self.true_box_buffer

                else:
                    print(
                        "Omitting image {} because of inconsistent labeling..".
                        format(train_instance['filename']))

            x_batch[instance_count] = img
            instance_count += 1

        # return [x_batch, b_batch], y_batch
        return x_batch, y_batch

    def __len__(self):
        return int(np.ceil(float(len(self.images)) / self.batch_size))

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.images)
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        h, w = (self.conf.input_height, self.conf.input_width)
        input_size = (h, w)

        # Devices
        gpu_list = get_available_gpus()
        zip_encoder, zip_decoder_b, zip_decoder_w, zip_crf = [], [], [], []
        previous_crf_names = []
        restore_vars = []
        self.loaders = []

        self.im_list = []

        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                # Load reader
                with tf.name_scope("create_inputs"):
                    reader = ImageReader(self.conf.data_dir,
                                         self.conf.data_list, input_size,
                                         self.conf.random_scale,
                                         self.conf.random_mirror,
                                         self.conf.ignore_label, IMG_MEAN,
                                         self.coord)
                    self.image_batch, self.label_batch = reader.dequeue(
                        self.conf.batch_size)
                    self.im_list.append(self.image_batch)
                    image_batch_075 = tf.image.resize_images(
                        self.image_batch,
                        [int(h * 0.75), int(w * 0.75)])
                    image_batch_05 = tf.image.resize_images(
                        self.image_batch,
                        [int(h * 0.5), int(w * 0.5)])

                # Create network
                with tf.variable_scope('', reuse=False):
                    net = Deeplab_v2(self.image_batch,
                                     self.conf.num_classes,
                                     True,
                                     rescale075=False,
                                     rescale05=False,
                                     crf_type=self.conf.crf_type)

                with tf.variable_scope('', reuse=True):
                    net075 = Deeplab_v2(image_batch_075,
                                        self.conf.num_classes,
                                        True,
                                        rescale075=True,
                                        rescale05=False,
                                        crf_type=self.conf.crf_type)

                with tf.variable_scope('', reuse=True):
                    net05 = Deeplab_v2(image_batch_05,
                                       self.conf.num_classes,
                                       True,
                                       rescale075=False,
                                       rescale05=True,
                                       crf_type=self.conf.crf_type)

                # Variables that load from pre-trained model.
                restore_var = [
                    v for v in tf.global_variables()
                    if ('fc' not in v.name and 'crfrnn' not in v.name)
                ]
                restore_vars.append(restore_var)

                # Trainable Variables
                all_trainable = tf.trainable_variables()
                # Fine-tune part
                for name in previous_crf_names:
                    for v in all_trainable:
                        if v.name == name:
                            all_trainable.remove(v)

                crf_trainable = [
                    v for v in all_trainable
                    if ('crfrnn' in v.name and v.name not in previous_crf_names
                        )
                ]
                previous_crf_names.extend(v.name for v in crf_trainable)
                encoder_trainable = [
                    v for v in all_trainable
                    if 'fc' not in v.name and 'crfrnn' not in v.name
                ]  # lr * 1.0

                # Remove encoder_trainable from all_trainable
                #all_trainable = [v for v in all_trainable if v not in encoder_trainable]

                # Decoder part
                decoder_trainable = [
                    v for v in all_trainable
                    if 'fc' in v.name and 'crfrnn' not in v.name
                ]

                decoder_w_trainable = [
                    v for v in decoder_trainable
                    if ('weights' in v.name or 'gamma' in v.name)
                    and 'crfrnn' not in v.name
                ]  # lr * 10.0
                decoder_b_trainable = [
                    v for v in decoder_trainable
                    if ('biases' in v.name or 'beta' in v.name)
                    and 'crfrnn' not in v.name
                ]  # lr * 20.0
                # Check
                assert (len(all_trainable) == len(decoder_trainable) +
                        len(crf_trainable)) + len(encoder_trainable)
                assert (len(decoder_trainable) == len(decoder_w_trainable) +
                        len(decoder_b_trainable))

                # Network raw output
                raw_output100 = net.outputs

                raw_output075 = net075.outputs
                raw_output05 = net05.outputs
                raw_output = tf.reduce_max(tf.stack([
                    raw_output100,
                    tf.image.resize_images(raw_output075,
                                           tf.shape(raw_output100)[1:3, ]),
                    tf.image.resize_images(raw_output05,
                                           tf.shape(raw_output100)[1:3, ])
                ]),
                                           axis=0)

                # Ground Truth: ignoring all labels greater or equal than n_classes
                label_proc = prepare_label(self.label_batch,
                                           tf.stack(
                                               raw_output.get_shape()[1:3]),
                                           num_classes=self.conf.num_classes,
                                           one_hot=True)  # [batch_size, h, w]
                label_proc075 = prepare_label(
                    self.label_batch,
                    tf.stack(raw_output075.get_shape()[1:3]),
                    num_classes=self.conf.num_classes,
                    one_hot=True)
                label_proc05 = prepare_label(
                    self.label_batch,
                    tf.stack(raw_output05.get_shape()[1:3]),
                    num_classes=self.conf.num_classes,
                    one_hot=True)

                raw_gt = tf.reshape(label_proc, [
                    -1,
                ])
                raw_gt075 = tf.reshape(label_proc075, [
                    -1,
                ])
                raw_gt05 = tf.reshape(label_proc05, [
                    -1,
                ])

                indices = tf.squeeze(
                    tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)),
                    1)
                indices075 = tf.squeeze(
                    tf.where(
                        tf.less_equal(raw_gt075, self.conf.num_classes - 1)),
                    1)
                indices05 = tf.squeeze(
                    tf.where(tf.less_equal(raw_gt05,
                                           self.conf.num_classes - 1)), 1)

                gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
                gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

                raw_prediction = tf.reshape(raw_output,
                                            [-1, self.conf.num_classes])
                raw_prediction100 = tf.reshape(raw_output100,
                                               [-1, self.conf.num_classes])
                raw_prediction075 = tf.reshape(raw_output075,
                                               [-1, self.conf.num_classes])
                raw_prediction05 = tf.reshape(raw_output05,
                                              [-1, self.conf.num_classes])

                prediction = tf.gather(raw_prediction, indices)
                prediction100 = tf.gather(raw_prediction100, indices)
                prediction075 = tf.gather(raw_prediction075, indices075)
                prediction05 = tf.gather(raw_prediction05, indices05)

                # Pixel-wise softmax_cross_entropy loss
                #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
                loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction,
                    labels=tf.reshape(label_proc[0],
                                      (h * w, self.conf.num_classes)))
                '''
                coefficients = [0.01460247, 1.25147725, 2.88479363, 1.20348121, 1.65261654, 1.67514772,
                                0.62338799, 0.7729363,  0.42038501, 0.98557268, 1.31867536, 0.85313332,
                                0.67227604, 1.21317965, 1.        , 0.24263748, 1.80877607, 1.3082213,
                                0.79664027, 0.72543945, 1.27823374]
                '''
                #loss = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction)
                #loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt)
                loss100 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction100,
                    labels=tf.reshape(label_proc[0],
                                      (h * w, self.conf.num_classes)))
                #loss100 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction100)
                #loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075)
                loss075 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction075,
                    labels=tf.reshape(label_proc075[0],
                                      (int(h * 0.75) * int(w * 0.75),
                                       self.conf.num_classes)))
                #loss075 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes)), logits=raw_prediction075)
                #loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05)
                loss05 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction05,
                    labels=tf.reshape(
                        label_proc05[0],
                        (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)))
                #loss05 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)), logits=raw_prediction05)

                # L2 regularization
                l2_losses = [
                    self.conf.weight_decay * tf.nn.l2_loss(v)
                    for v in all_trainable if 'weights' in v.name
                ]

                # Loss function
                self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
                    loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(
                        loss05) + tf.add_n(l2_losses)

                # Define optimizers
                # 'poly' learning rate
                base_lr = tf.constant(self.conf.learning_rate)
                self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
                learning_rate = tf.scalar_mul(
                    base_lr,
                    tf.pow((1 - self.curr_step / self.conf.num_steps),
                           self.conf.power))
                # We have several optimizers here in order to handle the different lr_mult
                # which is a kind of parameters in Caffe. This controls the actual lr for each
                # layer.
                opt_encoder = tf.train.MomentumOptimizer(
                    learning_rate, self.conf.momentum)
                opt_decoder_w = tf.train.MomentumOptimizer(
                    learning_rate * 10.0, self.conf.momentum)
                opt_decoder_b = tf.train.MomentumOptimizer(
                    learning_rate * 20.0, self.conf.momentum)
                opt_crf = tf.train.MomentumOptimizer(learning_rate,
                                                     self.conf.momentum)

                # Gradient accumulation
                # Define a variable to accumulate gradients.
                accum_grads = [
                    tf.Variable(tf.zeros_like(v.initialized_value()),
                                trainable=False) for v in encoder_trainable +
                    decoder_w_trainable + decoder_b_trainable + crf_trainable
                ]

                # Define an operation to clear the accumulated gradients for next batch.
                self.zero_op = [
                    v.assign(tf.zeros_like(v)) for v in accum_grads
                ]
                # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
                # Instead, we separate the steps compute_grads+update_params.
                # Compute grads
                grads = tf.gradients(
                    self.reduced_loss, encoder_trainable +
                    decoder_w_trainable + decoder_b_trainable + crf_trainable)
                # Accumulate and normalise the gradients.
                self.accum_grads_op = [
                    accum_grads[i].assign_add(grad /
                                              self.conf.grad_update_every)
                    for i, grad in enumerate(grads)
                ]

                grads_encoder = accum_grads[:len(encoder_trainable)]
                grads_decoder_w = accum_grads[len(encoder_trainable
                                                  ):len(encoder_trainable) +
                                              len(decoder_w_trainable)]
                grads_decoder_b = accum_grads[(
                    len(encoder_trainable) +
                    len(decoder_w_trainable)):(len(encoder_trainable) +
                                               len(decoder_w_trainable) +
                                               len(decoder_b_trainable))]
                grads_crf = accum_grads[
                    len(encoder_trainable) + len(decoder_w_trainable) +
                    len(decoder_b_trainable
                        ):]  # assuming crf gradients are appended to the end

                zip_encoder.append(list(zip(grads_encoder, encoder_trainable)))
                zip_decoder_b.append(
                    list(zip(grads_decoder_b, decoder_b_trainable)))
                zip_decoder_w.append(
                    list(zip(grads_decoder_w, decoder_w_trainable)))
                zip_crf.append(list(zip(grads_crf, crf_trainable)))

        avg_grads_encoder = average_gradients(zip_encoder)
        avg_grads_decoder_w = average_gradients(zip_decoder_w)
        avg_grads_decoder_b = average_gradients(zip_decoder_b)
        avg_grads_crf = average_gradients(zip_crf)

        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                # Update params
                train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder)
                train_op_fc_w = opt_decoder_w.apply_gradients(
                    avg_grads_decoder_w)
                train_op_fc_b = opt_decoder_b.apply_gradients(
                    avg_grads_decoder_b)
                train_op_crf = opt_crf.apply_gradients(avg_grads_crf)

        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_fc_w, train_op_fc_b,
                                     train_op_crf)  # train_op_conv

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                self.loaders.append(tf.train.Saver(var_list=restore_vars[i]))
                #self.loaders.append(tf.train.Saver(var_list=tf.global_variables()))

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, axis=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 1, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 1, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 1, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=1)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
Beispiel #26
0
def main():
    args = get_arguments()

    print('SETUP TrainConfig...')
    train_cfg = TrainConfig(args)
    train_cfg.display()

    # print('SETUP EvalConfig...')
    eval_cfg = EvalConfig(args)
    # eval_cfg.display()

    train_reader = ImageReader(train_cfg)
    eval_reader = ImageReader(eval_cfg)

    train_net = ICNet(train_cfg, train_reader, eval_reader)

    _train_op, _losses, _summaries, _Preds, _IoUs, _Images = train_net.optimizer(
    )

    vis = Visualizer(eval_cfg)

    global_step = train_net.start_step
    epoch_step = int(
        len(train_reader.attribute_list) / train_cfg.BATCH_SIZE + 0.5)
    start_epoch = int(global_step / epoch_step)
    save_step = int(epoch_step * train_cfg.SAVE_PERIOD)

    all_steps = int(len(eval_reader.attribute_list) / (eval_cfg.BATCH_SIZE))
    g_eval_step = 0

    train_fd = {train_net.handle: train_net.train_handle}
    eval_fd = {train_net.handle: train_net.eval_handle}

    for epochs in range(start_epoch, train_cfg.TRAIN_EPOCHS):

        epoch_loss = None
        start_batch = global_step % epoch_step

        print(f'Start batch - {start_batch}')
        print(f'Epoch step - {epoch_step}')

        for steps in range(start_batch, epoch_step):
            start_time = time.time()

            _, losses = train_net.sess.run([_train_op, _losses],
                                           feed_dict=train_fd)

            if epoch_loss is None:
                epoch_loss = np.array(losses)
            else:
                epoch_loss += np.array(losses)

            if global_step % save_step == 0:
                train_net.save(global_step)

            global_step += 1

            duration = time.time() - start_time
            msg = (
                f'''step {global_step} \t total loss = {losses[3]:.3f}, sub4 = {losses[0]:.3f}, '''
                f'''sub24 = {losses[1]:.3f}, sub124 = {losses[2]:.3f}, val_loss: {losses[4]:.3f}'''
                f'''({duration:.3f} sec/step)''')
            print(msg)

        epoch_loss /= (epoch_step - start_batch)
        accuracy = None

        for steps in range(all_steps - 1):
            start_time = time.time()

            IoUs = train_net.sess.run(_IoUs, feed_dict=eval_fd)

            if accuracy is None:
                accuracy = np.array(IoUs)
            else:
                accuracy += np.array(IoUs)

            g_eval_step += 1

            duration = time.time() - start_time
            msg = (
                f'''step {steps} \t mean_IoU = {IoUs[0]:.3f}, Person_IoU = {IoUs[1]:.3f}, '''
                f'''Rider_IoU = {IoUs[2]:.3f}, ({duration:.3f} sec/step)''')
            print(msg)

        IoUs, Preds, Images = train_net.sess.run([_IoUs, _Preds, _Images],
                                                 feed_dict=eval_fd)
        accuracy += np.array(IoUs)
        accuracy /= all_steps

        g_eval_step += 1

        vis.save_and_show(Images, Preds, g_eval_step)

        feed_dict = {
            train_net.sum_loss: epoch_loss,
            train_net.sum_acc: accuracy
        }
        summaries = train_net.sess.run(_summaries, feed_dict=feed_dict)
        train_net.writer.add_summary(summaries, epochs)
Beispiel #27
0
 def _read_image_from_url(self, url):
     return ImageReader.read_from_url(url)
Beispiel #28
0
    parser.add_argument('--warm_up', default=2, type=int, help='warm up number')
    parser.add_argument('--recalls', default='1,2,4,8', type=str, help='selected recall')

    opt = parser.parse_args()
    # args parse
    data_path, data_name, backbone_type = opt.data_path, opt.data_name, opt.backbone_type
    feature_dim, batch_size, num_epochs = opt.feature_dim, opt.batch_size, opt.num_epochs
    warm_up, recalls = opt.warm_up, [int(k) for k in opt.recalls.split(',')]
    save_name_pre = '{}_{}_{}'.format(data_name, backbone_type, feature_dim)

    results = {'train_loss': [], 'train_accuracy': []}
    for recall_id in recalls:
        results['test_recall@{}'.format(recall_id)] = []

    # dataset loader
    train_data_set = ImageReader(data_path, data_name, 'train', backbone_type)
    train_data_loader = DataLoader(train_data_set, batch_size, shuffle=True, num_workers=8)
    test_data_set = ImageReader(data_path, data_name, 'test', backbone_type)
    test_data_loader = DataLoader(test_data_set, batch_size, shuffle=False, num_workers=8)

    # model setup, optimizer config and loss definition
    model = Model(backbone_type, feature_dim, len(train_data_set.class_to_idx)).cuda()
    optimizer = AdamW([{'params': model.backbone.parameters()}, {'params': model.refactor.parameters()},
                       {'params': model.fc.parameters(), 'lr': 1e-2}], lr=1e-4, weight_decay=1e-4)
    lr_scheduler = StepLR(optimizer, step_size=5, gamma=0.5)
    loss_criterion = ProxyAnchorLoss()

    data_base = {'test_images': test_data_set.images, 'test_labels': test_data_set.labels}
    best_recall = 0.0
    for epoch in range(1, num_epochs + 1):
Beispiel #29
0
def full_run_single(video_id,
                    video_dir,
                    static_dir,
                    frame_by_frame_results_dir,
                    static_results_dir,
                    crop_boxes_dir,
                    ignore_mask_dir,
                    detector_config_path,
                    detector_model_path,
                    reid_model_path,
                    reid_model_backbone,
                    crop_results_dir,
                    anomaly_results_dir,
                    bg_interval=4,
                    bg_alpha=0.05,
                    bg_start_frame=1,
                    bg_threshold=5,
                    raw_detect_interval=30,
                    crop_min_obj_size=8,
                    crop_row_capacity=3,
                    crop_box_aspect_ratio=2,
                    ignore_count_thresh=0.08,
                    ignore_area_thresh=2000,
                    ignore_score_thresh=0.1,
                    ignore_gau_sigma=3,
                    abnormal_duration_thresh=60,
                    detect_duration_thresh=6,
                    undetect_duration_thresh=8,
                    bbox_score_thresh=0.3,
                    light_thresh=0.8,
                    anomaly_thresh=0.8,
                    similarity_thresh=0.95,
                    suspicious_duration_thresh=18,
                    detector_verbose_interval=20,
                    verbose=True):
    """
    Runs the full anomaly detection pipeline on a video

    video_id: video id/name
    video_dir: folder the video is in
    static_dir: folder to put the background images in
    frame_by_frame_results_dir: folder to put the raw video detection results in
    static_results_dir: folder to put the background image detection results in
    crop_boxes_dir: folder to put the crop boxes in
    ignore_mask_dir: folder to put the ignore region mask in

    detector_config_path: path to detector configuration file
    detector_model_path: path to detector model checkpoint
    reid_model_path: path to re-ID model checkpoint
    reid_model_backbone: re-ID model backbone. eg. "resnet50"

    bg_interval, bg_alpha, bg_start_frame, bg_threshold: see calc_bg_full_video function
    raw_detect_interval: number of frames between detection on raw video
    crop_min_obj_size, crop_row_capacity, crop_box_aspect_ratio: see create_crop_boxes function
    ignore_count_thresh, ignore_area_thresh, ignore_score_thresh, ignore_gau_sigma: see create_ignore_mask function
    abnormal_duration_thresh, detect_duration_thresh, undetect_duration_thresh, bbox_score_thresh,
        light_thresh, anomaly_thresh, similarity_thresh, suspicious_duration_thresh:
            See get_anomalies function

    detector_verbose_interval: detector progress printing interval
    verbose: verbose printing


    """

    # Set up file paths
    video_path = os.path.join(video_dir, f"{video_id}.mp4")
    static_images_folder = os.path.join(static_dir, f"{video_id}")
    fbf_results_path = os.path.join(frame_by_frame_results_dir,
                                    f"{video_id}.csv")
    static_results_path = os.path.join(static_results_dir, f"{video_id}.csv")
    crop_boxes_path = os.path.join(crop_boxes_dir, f"{video_id}.csv")
    crop_results_path = os.path.join(crop_results_dir, f"{video_id}.csv")
    ignore_mask_path = os.path.join(ignore_mask_dir, f"{video_id}.npy")
    anomaly_results_path = os.path.join(anomaly_results_dir, f"{video_id}.csv")

    # Create folders
    os.makedirs(static_images_folder, exist_ok=True)
    os.makedirs(frame_by_frame_results_dir, exist_ok=True)
    os.makedirs(static_results_dir, exist_ok=True)
    os.makedirs(crop_boxes_dir, exist_ok=True)
    os.makedirs(crop_results_dir, exist_ok=True)
    os.makedirs(ignore_mask_dir, exist_ok=True)
    os.makedirs(anomaly_results_dir, exist_ok=True)

    # Read Video
    raw_video = VideoReader(video_path)

    # bg modeling
    print("Creating background...")
    calc_bg_full_video(video_path, static_images_folder, bg_interval, bg_alpha,
                       bg_start_frame, bg_threshold, verbose)

    # Detection
    detector = Detector(detector_config_path,
                        detector_model_path,
                        detector_verbose_interval,
                        class_restrictions=None)
    # class_names = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
    #                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
    #                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
    #                'tvmonitor')
    # detector.model.CLASSES = class_names
    # detector.class_labels = class_names
    ## Raw Video
    print("Detecting raw video...")
    raw_images, raw_frame_nums = raw_video.load_video(raw_detect_interval)
    fbf_results = detector.detect_images(raw_images, raw_frame_nums)
    fbf_results.to_csv(fbf_results_path, index=False)

    ## Static Images
    static_reader = ImageReader(static_images_folder)
    static_frame_names = list(
        map(lambda f: int(f[:-4]),
            static_reader.filenames))  # "123.jpg" -> 123

    print("Detecting background...")
    static_results = detector.detect_images(static_reader.load_images(),
                                            static_frame_names)
    static_results.to_csv(static_results_path, index=False)

    # Perspective Cropping
    print("Creating crop boxes...")
    create_crop_boxes(
        fbf_results_path, crop_boxes_path, raw_video.img_shape,
        crop_min_obj_size, crop_row_capacity,
        crop_box_aspect_ratio)  # either static/fbf results should work

    # Should be able to use this in place of normal static images. Doesnt look feasable atm, way too long detection time
    crop_boxes = pd.read_csv(crop_boxes_path).values
    print("Detecting cropped background...")
    crop_detect_results = detector.detect_images(static_reader.load_images(),
                                                 static_frame_names,
                                                 crop_boxes=crop_boxes)
    crop_detect_results.to_csv(crop_results_path)

    #     # Ignore Region
    print("Creating ingore mask...")
    create_ignore_mask(fbf_results_path, ignore_mask_path, raw_video.img_shape,
                       ignore_count_thresh, ignore_area_thresh,
                       ignore_score_thresh, ignore_gau_sigma)

    # Detect anomalies
    print("Detecting anomalies...")
    anomalies = get_anomalies_preprocessed(
        video_path, reid_model_path, fbf_results_path, static_results_path,
        ignore_mask_path, reid_model_backbone, bg_start_frame, bg_interval,
        abnormal_duration_thresh, detect_duration_thresh,
        undetect_duration_thresh, bbox_score_thresh, light_thresh,
        anomaly_thresh, similarity_thresh, suspicious_duration_thresh, verbose)

    if anomalies is not None:
        anomaly_event_times = get_overlapping_time(anomalies)

        # Save results
        print("Saving Results...")
        anomalies.to_csv(anomaly_results_path, index=False)

        return anomalies, anomaly_event_times

    else:
        return [], []
Beispiel #30
0
	def train_setup(self):
		tf.set_random_seed(self.conf.random_seed)
		
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Input size
		self.input_size = (self.conf.input_height, self.conf.input_width)
		
		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.data_list,
				self.input_size,
				self.conf.random_scale,
				self.conf.random_mirror,
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size)
		
		# Create network
		net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes,
			is_training=self.conf.is_training)
		#net = DeepLabVGGModel(self.image_batch, num_classes=self.conf.num_classes,
		#												 is_training=self.conf.is_training)
		
		# Network raw output
		self.raw_output = net.o # [batch_size, 41, 41, 21]

		self.raw_output=tf.image.resize_bilinear(self.raw_output, [350,350])
		print(tf.shape(self.image_batch))
		# Output size
		output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value)
		
		# Variables that load from pre-trained model.
		# For training, last few layers should not be loaded.
		#restore_var = [v for v in tf.global_variables() if 'fc' not in v.name]   #这个是对INIT初始化模型用的
		restore_var = [v for v in tf.global_variables() ]    #恢复所有的参数。
		# Trainable Variables
		# Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
		# if they are presented in var_list of the optimiser definition.
		# So we remove them from the list.
		all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
		# Fine-tune part
		conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
		# ASPP part
		fc_trainable = [v for v in all_trainable if 'fc' in v.name]
		fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
		fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
		# check
		print(len(fc_trainable))
		print(len(fc_w_trainable) + len(fc_b_trainable))
		assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
		assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

		# Groud Truth: ignoring all labels greater or equal than n_classes
		label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41]
		raw_gt = tf.reshape(label_proc, [-1,])
		indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
		gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
		raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes])
		prediction = tf.gather(raw_prediction, indices)

		# Pixel-wise softmax_cross_entropy loss
		loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
		
		# L2 regularization
		l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
		# Loss function
		self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

		# Define optimizers
		# 'poly' learning rate
		base_lr = tf.constant(self.conf.learning_rate)
		self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
		#learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (15000+self.curr_step) /(15000+ self.conf.num_steps)), self.conf.power))
		learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (self.curr_step) /(self.conf.num_steps)), self.conf.power))

		# We have several optimizers here in order to handle the different lr_mult
		# which is a kind of parameters in Caffe. This controls the actual lr for each
		# layer.
		opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
		opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
		opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
		# To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
		# Instead, we separate the steps compute_grads+update_params.
		# Compute grads
		grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
		grads_conv = grads[:len(conv_trainable)]
		grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
		grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
		# Update params
		train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
		train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
		train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
		# Finally, get the train_op!
		self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
		#self.train_op = tf.group(train_op_fc_w, train_op_fc_b) #只优化全连接部分

		# Saver for storing checkpoints of the model
		self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

		# Loader for loading the pre-trained model
		self.loader = tf.train.Saver(var_list=restore_var)