コード例 #1
0
def get_validation_batch(dataset, labels, batch_size, starting_index):
    ''' Load next training batch '''
    batch = {'images': [], 'labels': []}
    for index in range(starting_index, starting_index + batch_size):
        label = labels[index]
        image = cv2.imread(os.path.join(dataset, str(index).zfill(5) + '.png'))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        image = np.pad(image, ((2, 2), (2, 2)), 'constant')
        image = image / 255.
        image = np.expand_dims(image, axis=-1)
        label = prepare_label(label)
        batch['images'].append(image)
        batch['labels'].append(label)
    # stacking elements in a single tensor
    batch['images'] = np.array(batch['images'])
    batch['labels'] = np.array(batch['labels'])

    return batch
コード例 #2
0
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        input_size = (self.conf.input_height, self.conf.input_width)

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = RGBDReader(self.conf.data_dir, self.conf.data_list,
                                input_size, self.conf.random_scale,
                                self.conf.random_mirror,
                                self.conf.ignore_label, IMG_MEAN, self.coord)
            self.image_batch, self.label_batch, self.depth_batch = reader.dequeue(
                self.conf.batch_size)

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            net = Deeplab_RGBD(self.image_batch, self.depth_batch,
                               self.conf.num_classes, True)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'fc' not in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
        else:
            net = ResNet_segmentation(self.image_batch, self.conf.num_classes,
                                      True, self.conf.encoder_name)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'resnet_v1' in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'resnet_v1' in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [
                v for v in all_trainable if 'decoder' in v.name
            ]

        decoder_w_trainable = [
            v for v in decoder_trainable
            if 'weights' in v.name or 'gamma' in v.name
        ]  # lr * 10.0
        decoder_b_trainable = [
            v for v in decoder_trainable
            if 'biases' in v.name or 'beta' in v.name
        ]  # lr * 20.0
        # Check
        assert (len(all_trainable) == len(decoder_trainable) +
                len(encoder_trainable))
        assert (len(decoder_trainable) == len(decoder_w_trainable) +
                len(decoder_b_trainable))

        # Network raw output
        raw_output = net.outputs  # [batch_size, h, w, 21]

        # Output size
        output_shape = tf.shape(raw_output)
        output_size = (output_shape[1], output_shape[2])

        # Groud Truth: ignoring all labels greater or equal than n_classes
        label_proc = prepare_label(self.label_batch,
                                   output_size,
                                   num_classes=self.conf.num_classes,
                                   one_hot=False)
        raw_gt = tf.reshape(label_proc, [
            -1,
        ])
        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
        prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax_cross_entropy loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        # L2 regularization
        l2_losses = [
            self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable
            if 'weights' in v.name
        ]
        # Loss function
        self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr,
            tf.pow((1 - self.curr_step / self.conf.num_steps),
                   self.conf.power))
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt_encoder = tf.train.AdamOptimizer(learning_rate, self.conf.momentum)
        opt_decoder_w = tf.train.AdamOptimizer(learning_rate * 10.0,
                                               self.conf.momentum)
        opt_decoder_b = tf.train.AdamOptimizer(learning_rate * 20.0,
                                               self.conf.momentum)
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        grads_encoder = grads[:len(encoder_trainable)]
        grads_decoder_w = grads[len(encoder_trainable):(
            len(encoder_trainable) + len(decoder_w_trainable))]
        grads_decoder_b = grads[(len(encoder_trainable) +
                                 len(decoder_w_trainable)):]
        # Update params
        train_op_conv = opt_encoder.apply_gradients(
            zip(grads_encoder, encoder_trainable))
        train_op_fc_w = opt_decoder_w.apply_gradients(
            zip(grads_decoder_w, decoder_w_trainable))
        train_op_fc_b = opt_decoder_b.apply_gradients(
            zip(grads_decoder_b, decoder_b_trainable))
        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_conv, train_op_fc_w,
                                     train_op_fc_b)

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        self.loader = tf.train.Saver(var_list=restore_var)

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, dim=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 2, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 2, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 2, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=2)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
コード例 #3
0
	def train_setup(self):
		tf.set_random_seed(self.conf.random_seed)
		
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Input size
		input_size = (self.conf.input_height, self.conf.input_width)
		
		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.data_list,
				input_size,
				self.conf.random_scale,
				self.conf.random_mirror,
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size)
		
		# Create network
		if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
			print('encoder_name ERROR!')
			print("Please input: res101, res50, or deeplab")
			sys.exit(-1)
		elif self.conf.encoder_name == 'deeplab':
			net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
			# Variables that load from pre-trained model.
			restore_var = [v for v in tf.global_variables() if 'fc' not in v.name]
			# Trainable Variables
			all_trainable = tf.trainable_variables()
			# Fine-tune part
			encoder_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
			# Decoder part
			decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
		else:
			net = ResNet_segmentation(self.image_batch, self.conf.num_classes, True, self.conf.encoder_name)
			# Variables that load from pre-trained model.
			restore_var = [v for v in tf.global_variables() if 'resnet_v1' in v.name]
			# Trainable Variables
			all_trainable = tf.trainable_variables()
			# Fine-tune part
			encoder_trainable = [v for v in all_trainable if 'resnet_v1' in v.name] # lr * 1.0
			# Decoder part
			decoder_trainable = [v for v in all_trainable if 'decoder' in v.name]
		
		decoder_w_trainable = [v for v in decoder_trainable if 'weights' in v.name or 'gamma' in v.name] # lr * 10.0
		decoder_b_trainable = [v for v in decoder_trainable if 'biases' in v.name or 'beta' in v.name] # lr * 20.0
		# Check
		assert(len(all_trainable) == len(decoder_trainable) + len(encoder_trainable))
		assert(len(decoder_trainable) == len(decoder_w_trainable) + len(decoder_b_trainable))

		# Network raw output
		raw_output = net.outputs # [batch_size, h, w, 21]

		# Output size
		output_shape = tf.shape(raw_output)
		output_size = (output_shape[1], output_shape[2])

		# Groud Truth: ignoring all labels greater or equal than n_classes
		label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False)
		raw_gt = tf.reshape(label_proc, [-1,])
		indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
		gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
		raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
		prediction = tf.gather(raw_prediction, indices)

		# Pixel-wise softmax_cross_entropy loss
		loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
		# L2 regularization
		l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable if 'weights' in v.name]
		# Loss function
		self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

		# Define optimizers
		# 'poly' learning rate
		base_lr = tf.constant(self.conf.learning_rate)
		self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
		learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - self.curr_step / self.conf.num_steps), self.conf.power))
		# We have several optimizers here in order to handle the different lr_mult
		# which is a kind of parameters in Caffe. This controls the actual lr for each
		# layer.
		opt_encoder = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
		opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
		opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
		# To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
		# Instead, we separate the steps compute_grads+update_params.
		# Compute grads
		grads = tf.gradients(self.reduced_loss, encoder_trainable + decoder_w_trainable + decoder_b_trainable)
		grads_encoder = grads[:len(encoder_trainable)]
		grads_decoder_w = grads[len(encoder_trainable) : (len(encoder_trainable) + len(decoder_w_trainable))]
		grads_decoder_b = grads[(len(encoder_trainable) + len(decoder_w_trainable)):]
		# Update params
		train_op_conv = opt_encoder.apply_gradients(zip(grads_encoder, encoder_trainable))
		train_op_fc_w = opt_decoder_w.apply_gradients(zip(grads_decoder_w, decoder_w_trainable))
		train_op_fc_b = opt_decoder_b.apply_gradients(zip(grads_decoder_b, decoder_b_trainable))
		# Finally, get the train_op!
		update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for collecting moving_mean and moving_variance
		with tf.control_dependencies(update_ops):
			self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)

		# Saver for storing checkpoints of the model
		self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=0)

		# Loader for loading the pre-trained model
		self.loader = tf.train.Saver(var_list=restore_var)

		# Training summary
		# Processed predictions: for visualisation.
		raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
		raw_output_up = tf.argmax(raw_output_up, axis=3)
		self.pred = tf.expand_dims(raw_output_up, dim=3)
		# Image summary.
		images_summary = tf.py_func(inv_preprocess, [self.image_batch, 2, IMG_MEAN], tf.uint8)
		labels_summary = tf.py_func(decode_labels, [self.label_batch, 2, self.conf.num_classes], tf.uint8)
		preds_summary = tf.py_func(decode_labels, [self.pred, 2, self.conf.num_classes], tf.uint8)
		self.total_summary = tf.summary.image('images',
			tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]),
			max_outputs=2) # Concatenate row-wise.
		if not os.path.exists(self.conf.logdir):
			os.makedirs(self.conf.logdir)
		self.summary_writer = tf.summary.FileWriter(self.conf.logdir, graph=tf.get_default_graph())
コード例 #4
0
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        h, w = (self.conf.input_height, self.conf.input_width)
        input_size = (h, w)

        # Load reader
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            self.image_batch, self.label_batch = reader.dequeue(
                self.conf.batch_size)
            image_batch_075 = tf.image.resize_images(
                self.image_batch, [int(h * 0.75), int(w * 0.75)])
            image_batch_05 = tf.image.resize_images(
                self.image_batch, [int(h * 0.5), int(w * 0.5)])
            # #testWWang
            # image = self.image_batch[0]
            # label = self.label_batch[0]
            # utils.save_image(image, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images",
            #                  name = image ,mean = IMG_MEAN)
            # utils.save_image(label, "/home/py36tf14/wangyichao/Deeplab-v2--ResNet-101--Tensorflow-master/images",
            #                  name=label, mean=IMG_MEAN)
            #
            # #end

        # Create network
        if self.conf.encoder_name not in ['res101', 'res50', 'deeplab']:
            print('encoder_name ERROR!')
            print("Please input: res101, res50, or deeplab")
            sys.exit(-1)
        elif self.conf.encoder_name == 'deeplab':
            with tf.variable_scope('', reuse=False):
                net = Deeplab_v2(self.image_batch, self.conf.num_classes, True)
            with tf.variable_scope('', reuse=True):
                net075 = Deeplab_v2(image_batch_075, self.conf.num_classes,
                                    True)
            with tf.variable_scope('', reuse=True):
                net05 = Deeplab_v2(image_batch_05, self.conf.num_classes, True)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'fc' not in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [v for v in all_trainable if 'fc' in v.name]
        else:
            with tf.variable_scope('', reuse=False):
                net = ResNet_segmentation(self.image_batch,
                                          self.conf.num_classes, True,
                                          self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net075 = ResNet_segmentation(image_batch_075,
                                             self.conf.num_classes, True,
                                             self.conf.encoder_name)
            with tf.variable_scope('', reuse=True):
                net05 = ResNet_segmentation(image_batch_05,
                                            self.conf.num_classes, True,
                                            self.conf.encoder_name)
            # Variables that load from pre-trained model.
            restore_var = [
                v for v in tf.global_variables() if 'resnet_v1' in v.name
            ]
            # Trainable Variables
            all_trainable = tf.trainable_variables()
            # Fine-tune part
            encoder_trainable = [
                v for v in all_trainable if 'resnet_v1' in v.name
            ]  # lr * 1.0
            # Decoder part
            decoder_trainable = [
                v for v in all_trainable if 'decoder' in v.name
            ]

        decoder_w_trainable = [
            v for v in decoder_trainable
            if 'weights' in v.name or 'gamma' in v.name
        ]  # lr * 10.0
        decoder_b_trainable = [
            v for v in decoder_trainable
            if 'biases' in v.name or 'beta' in v.name
        ]  # lr * 20.0
        # Check
        assert (len(all_trainable) == len(decoder_trainable) +
                len(encoder_trainable))
        assert (len(decoder_trainable) == len(decoder_w_trainable) +
                len(decoder_b_trainable))

        # Network raw output
        raw_output100 = net.outputs
        raw_output075 = net075.outputs
        raw_output05 = net05.outputs
        raw_output = tf.reduce_max(tf.stack([
            raw_output100,
            tf.image.resize_images(raw_output075,
                                   tf.shape(raw_output100)[1:3, ]),
            tf.image.resize_images(raw_output05,
                                   tf.shape(raw_output100)[1:3, ])
        ]),
                                   axis=0)

        # Groud Truth: ignoring all labels greater or equal than n_classes
        label_proc = prepare_label(self.label_batch,
                                   tf.stack(raw_output.get_shape()[1:3]),
                                   num_classes=self.conf.num_classes,
                                   one_hot=False)  # [batch_size, h, w]
        label_proc075 = prepare_label(self.label_batch,
                                      tf.stack(raw_output075.get_shape()[1:3]),
                                      num_classes=self.conf.num_classes,
                                      one_hot=False)
        label_proc05 = prepare_label(self.label_batch,
                                     tf.stack(raw_output05.get_shape()[1:3]),
                                     num_classes=self.conf.num_classes,
                                     one_hot=False)

        raw_gt = tf.reshape(label_proc, [
            -1,
        ])
        raw_gt075 = tf.reshape(label_proc075, [
            -1,
        ])
        raw_gt05 = tf.reshape(label_proc05, [
            -1,
        ])

        indices = tf.squeeze(
            tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        indices075 = tf.squeeze(
            tf.where(tf.less_equal(raw_gt075, self.conf.num_classes - 1)), 1)
        indices05 = tf.squeeze(
            tf.where(tf.less_equal(raw_gt05, self.conf.num_classes - 1)), 1)

        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
        gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

        raw_prediction = tf.reshape(raw_output, [-1, self.conf.num_classes])
        raw_prediction100 = tf.reshape(raw_output100,
                                       [-1, self.conf.num_classes])
        raw_prediction075 = tf.reshape(raw_output075,
                                       [-1, self.conf.num_classes])
        raw_prediction05 = tf.reshape(raw_output05,
                                      [-1, self.conf.num_classes])

        prediction = tf.gather(raw_prediction, indices)
        prediction100 = tf.gather(raw_prediction100, indices)
        prediction075 = tf.gather(raw_prediction075, indices075)
        prediction05 = tf.gather(raw_prediction05, indices05)

        # Pixel-wise softmax_cross_entropy loss
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction, labels=gt)
        loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction100, labels=gt)
        loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction075, labels=gt075)
        loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=prediction05, labels=gt05)
        # L2 regularization
        l2_losses = [
            self.conf.weight_decay * tf.nn.l2_loss(v) for v in all_trainable
            if 'weights' in v.name
        ]
        # Loss function
        self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
            loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(
                loss05) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        learning_rate = tf.scalar_mul(
            base_lr,
            tf.pow((1 - self.curr_step / self.conf.num_steps),
                   self.conf.power))
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt_encoder = tf.train.MomentumOptimizer(learning_rate,
                                                 self.conf.momentum)
        opt_decoder_w = tf.train.MomentumOptimizer(learning_rate * 10.0,
                                                   self.conf.momentum)
        opt_decoder_b = tf.train.MomentumOptimizer(learning_rate * 20.0,
                                                   self.conf.momentum)

        # Gradient accumulation
        # Define a variable to accumulate gradients.
        accum_grads = [
            tf.Variable(tf.zeros_like(v.initialized_value()), trainable=False)
            for v in encoder_trainable + decoder_w_trainable +
            decoder_b_trainable
        ]
        # Define an operation to clear the accumulated gradients for next batch.
        self.zero_op = [v.assign(tf.zeros_like(v)) for v in accum_grads]
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        # Accumulate and normalise the gradients.
        self.accum_grads_op = [
            accum_grads[i].assign_add(grad / self.conf.grad_update_every)
            for i, grad in enumerate(grads)
        ]

        grads = tf.gradients(
            self.reduced_loss,
            encoder_trainable + decoder_w_trainable + decoder_b_trainable)
        grads_encoder = accum_grads[:len(encoder_trainable)]
        grads_decoder_w = accum_grads[len(encoder_trainable):(
            len(encoder_trainable) + len(decoder_w_trainable))]
        grads_decoder_b = accum_grads[(len(encoder_trainable) +
                                       len(decoder_w_trainable)):]
        # Update params
        train_op_conv = opt_encoder.apply_gradients(
            zip(grads_encoder, encoder_trainable))
        train_op_fc_w = opt_decoder_w.apply_gradients(
            zip(grads_decoder_w, decoder_w_trainable))
        train_op_fc_b = opt_decoder_b.apply_gradients(
            zip(grads_decoder_b, decoder_b_trainable))
        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_conv, train_op_fc_w,
                                     train_op_fc_b)

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        self.loader = tf.train.Saver(var_list=restore_var)

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, dim=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 1, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 1, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 1, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=20)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
コード例 #5
0
    def train_setup(self, reuse=False):
        tf.set_random_seed(self.conf.random_seed)
        num_layers = 50  #-----------------------------------------------------------------------------------------

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()
        self.n_gpu = self.conf.n_gpu

        # Input size
        self.input_size = (self.conf.input_height, self.conf.input_width)
        j_step = 0
        with tf.name_scope("create_inputs"):
            reader = ImageReader(self.conf.data_dir, self.conf.data_list,
                                 self.input_size, self.conf.random_scale,
                                 self.conf.random_mirror,
                                 self.conf.ignore_label, IMG_MEAN, self.coord)
            # print "1"*22
            # print reader
        image_data, image_label = reader.dequeue(self.conf.batch_size)
        self.image_data = image_data
        if tf.__version__.startswith('1.'):
            split_train_data_node = tf.split(image_data, self.n_gpu)
            split_train_labels_node = tf.split(image_label, self.n_gpu)
        else:
            split_train_data_node = tf.split(0, self.n_gpu, image_data)
            split_train_labels_node = tf.split(0, self.n_gpu, image_label)
        with tf.variable_scope(tf.get_variable_scope()):
            all_loss = []
            for device_index, (i, self.image_batch,
                               self.label_batch) in enumerate(
                                   zip([1], split_train_data_node,
                                       split_train_labels_node)):
                with tf.device('/gpu:%d' % i):
                    #print i
                    with tf.name_scope('%s_%d' % ("gpu", i)) as scope:
                        if j_step == 0:
                            j_step = 1
                            pass
                        else:
                            reuse = True
                        # net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes,
                        #                          is_training=self.conf.is_training ,reuse=reuse)
                        net, end_points = deeplabv3(
                            self.image_batch,
                            num_classes=self.conf.num_classes,
                            depth=num_layers,
                            is_training=True,
                            reuse=reuse)
                        self.raw_output = end_points[
                            'gpu_{}/resnet{}/logits'.format(i, num_layers)]
                        # Network raw output
                        # [batch_size, 41, 41, 21]
                        output_size = (self.raw_output.shape[1].value,
                                       self.raw_output.shape[2].value)

                        label_proc = prepare_label(
                            self.label_batch,
                            output_size,
                            num_classes=self.conf.num_classes,
                            one_hot=False)  # [batch_size, 41, 41]
                        raw_gt = tf.reshape(label_proc, [
                            -1,
                        ])
                        indices = tf.squeeze(
                            tf.where(
                                tf.less_equal(raw_gt,
                                              self.conf.num_classes - 1)), 1)
                        gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                        raw_prediction = tf.reshape(
                            self.raw_output, [-1, self.conf.num_classes])
                        # print raw_prediction
                        # print gt
                        prediction = raw_prediction
                        # prediction = tf.expand_dims(raw_prediction,  3)
                        # prediction = tl.act.pixel_wise_softmax(prediction)
                        # print prediction
                        # print label_proc
                        # loss = 1 - tl.cost.dice_coe(prediction, label_proc, axis=[1, 2, 3, 4])
                        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                            logits=prediction, labels=gt)
                        l2_losses = [
                            self.conf.weight_decay * tf.nn.l2_loss(v)
                            for v in tf.trainable_variables()
                            if 'weights' in v.name
                        ]
                        # Loss function
                        all_loss.append(
                            tf.reduce_mean(loss) + tf.add_n(l2_losses))
                        tf.get_variable_scope().reuse_variables()

        # Output size
        #output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value)

        # Variables that load from pre-trained model.
        # For training, last few layers should not be loaded.
        if self.conf.pretrain_file is not None:
            restore_var = [
                v for v in tf.global_variables() if 'fc' not in v.name
            ]
            original_step = int(self.conf.pretrain_file.split("-")[-1])
        else:
            original_step = 0
        num_steps = self.conf.num_steps + original_step
        # Trainable Variables
        # Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
        # if they are presented in var_list of the optimiser definition.
        # So we remove them from the list.
        all_trainable = [
            v for v in tf.trainable_variables()
            if 'beta' not in v.name and 'gamma' not in v.name
        ]
        # Fine-tune part
        conv_trainable = [v for v in all_trainable
                          if 'fc' not in v.name]  # lr * 1.0
        # ASPP part
        fc_trainable = [v for v in all_trainable if 'fc' in v.name]
        # fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name]  # lr * 10.0
        # fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name]  # lr * 20.0
        # check
        #assert (len(all_trainable) == len(fc_trainable) + len(conv_trainable))
        #assert (len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

        # Groud Truth: ignoring all labels greater or equal than n_classes
        #label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes,
        #one_hot=False)  # [batch_size, 41, 41]
        #raw_gt = tf.reshape(label_proc, [-1, ])
        #indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
        #gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
        #raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes])
        #prediction = tf.gather(raw_prediction, indices)

        # Pixel-wise softmax_cross_entropy loss
        #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
        # L2 regularization
        #l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
        # Loss function
        self.reduced_loss = tf.add_n(all_loss) / self.n_gpu
        #self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

        # Define optimizers
        # 'poly' learning rate
        base_lr = tf.constant(self.conf.learning_rate)
        self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
        self.loss_trans = tf.placeholder(dtype=tf.float32, shape=())
        self.final_loss = (self.reduced_loss + self.loss_trans) / 2

        learning_rate = tf.scalar_mul(
            base_lr, tf.pow((1 - self.curr_step / num_steps), self.conf.power))
        #print self.conf.power
        self.learning_rate = learning_rate
        #print  learning_rate
        # We have several optimizers here in order to handle the different lr_mult
        # which is a kind of parameters in Caffe. This controls the actual lr for each
        # layer.
        opt = tf.train.AdamOptimizer(learning_rate, self.conf.momentum, 0.98)
        #opt= tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
        #opt_fc_w = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98)
        #opt_fc_b = tf.train.AdamOptimizer(learning_rate , self.conf.momentum,0.98)
        #opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
        #opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
        #opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
        # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
        # Instead, we separate the steps compute_grads+update_params.
        # Compute grads
        grads_conv = tf.gradients(self.final_loss, conv_trainable)
        # train_op = opt.apply_gradients(zip(grads_conv, conv_trainable))
        #grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
        grads_conv = grads_conv[:len(conv_trainable)]
        # grads_fc_w = grads[len(conv_trainable): (len(conv_trainable) + len(fc_w_trainable))]
        # grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
        # Update params
        train_op_conv = opt.apply_gradients(zip(grads_conv, conv_trainable))
        # train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
        # train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
        # train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
        # Finally, get the train_op!
        self.train_op = train_op_conv
        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        if self.conf.pretrain_file is not None:
            self.loader = tf.train.Saver(var_list=restore_var)
コード例 #6
0
	def train_setup(self):
		tf.set_random_seed(self.conf.random_seed)
		
		# Create queue coordinator.
		self.coord = tf.train.Coordinator()

		# Input size
		self.input_size = (self.conf.input_height, self.conf.input_width)
		
		# Load reader
		with tf.name_scope("create_inputs"):
			reader = ImageReader(
				self.conf.data_dir,
				self.conf.data_list,
				self.input_size,
				self.conf.random_scale,
				self.conf.random_mirror,
				self.conf.ignore_label,
				IMG_MEAN,
				self.coord)
			self.image_batch, self.label_batch = reader.dequeue(self.conf.batch_size)
		
		# Create network
		net = DeepLab_v2_Network(self.image_batch, num_classes=self.conf.num_classes,
			is_training=self.conf.is_training)
		#net = DeepLabVGGModel(self.image_batch, num_classes=self.conf.num_classes,
		#												 is_training=self.conf.is_training)
		
		# Network raw output
		self.raw_output = net.o # [batch_size, 41, 41, 21]

		self.raw_output=tf.image.resize_bilinear(self.raw_output, [350,350])
		print(tf.shape(self.image_batch))
		# Output size
		output_size = (self.raw_output.shape[1].value, self.raw_output.shape[2].value)
		
		# Variables that load from pre-trained model.
		# For training, last few layers should not be loaded.
		#restore_var = [v for v in tf.global_variables() if 'fc' not in v.name]   #这个是对INIT初始化模型用的
		restore_var = [v for v in tf.global_variables() ]    #恢复所有的参数。
		# Trainable Variables
		# Note that is_training=False still updates BN parameters gamma (scale) and beta (offset)
		# if they are presented in var_list of the optimiser definition.
		# So we remove them from the list.
		all_trainable = [v for v in tf.trainable_variables() if 'beta' not in v.name and 'gamma' not in v.name]
		# Fine-tune part
		conv_trainable = [v for v in all_trainable if 'fc' not in v.name] # lr * 1.0
		# ASPP part
		fc_trainable = [v for v in all_trainable if 'fc' in v.name]
		fc_w_trainable = [v for v in fc_trainable if 'weights' in v.name] # lr * 10.0
		fc_b_trainable = [v for v in fc_trainable if 'biases' in v.name] # lr * 20.0
		# check
		print(len(fc_trainable))
		print(len(fc_w_trainable) + len(fc_b_trainable))
		assert(len(all_trainable) == len(fc_trainable) + len(conv_trainable))
		assert(len(fc_trainable) == len(fc_w_trainable) + len(fc_b_trainable))

		# Groud Truth: ignoring all labels greater or equal than n_classes
		label_proc = prepare_label(self.label_batch, output_size, num_classes=self.conf.num_classes, one_hot=False) # [batch_size, 41, 41]
		raw_gt = tf.reshape(label_proc, [-1,])
		indices = tf.squeeze(tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)), 1)
		gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
		raw_prediction = tf.reshape(self.raw_output, [-1, self.conf.num_classes])
		prediction = tf.gather(raw_prediction, indices)

		# Pixel-wise softmax_cross_entropy loss
		loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
		
		# L2 regularization
		l2_losses = [self.conf.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
		# Loss function
		self.reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)

		# Define optimizers
		# 'poly' learning rate
		base_lr = tf.constant(self.conf.learning_rate)
		self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
		#learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (15000+self.curr_step) /(15000+ self.conf.num_steps)), self.conf.power))
		learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - (self.curr_step) /(self.conf.num_steps)), self.conf.power))

		# We have several optimizers here in order to handle the different lr_mult
		# which is a kind of parameters in Caffe. This controls the actual lr for each
		# layer.
		opt_conv = tf.train.MomentumOptimizer(learning_rate, self.conf.momentum)
		opt_fc_w = tf.train.MomentumOptimizer(learning_rate * 10.0, self.conf.momentum)
		opt_fc_b = tf.train.MomentumOptimizer(learning_rate * 20.0, self.conf.momentum)
		# To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
		# Instead, we separate the steps compute_grads+update_params.
		# Compute grads
		grads = tf.gradients(self.reduced_loss, conv_trainable + fc_w_trainable + fc_b_trainable)
		grads_conv = grads[:len(conv_trainable)]
		grads_fc_w = grads[len(conv_trainable) : (len(conv_trainable) + len(fc_w_trainable))]
		grads_fc_b = grads[(len(conv_trainable) + len(fc_w_trainable)):]
		# Update params
		train_op_conv = opt_conv.apply_gradients(zip(grads_conv, conv_trainable))
		train_op_fc_w = opt_fc_w.apply_gradients(zip(grads_fc_w, fc_w_trainable))
		train_op_fc_b = opt_fc_b.apply_gradients(zip(grads_fc_b, fc_b_trainable))
		# Finally, get the train_op!
		self.train_op = tf.group(train_op_conv, train_op_fc_w, train_op_fc_b)
		#self.train_op = tf.group(train_op_fc_w, train_op_fc_b) #只优化全连接部分

		# Saver for storing checkpoints of the model
		self.saver = tf.train.Saver(var_list=tf.global_variables(), max_to_keep=5)

		# Loader for loading the pre-trained model
		self.loader = tf.train.Saver(var_list=restore_var)
コード例 #7
0
    def train_setup(self):
        tf.set_random_seed(self.conf.random_seed)

        # Create queue coordinator.
        self.coord = tf.train.Coordinator()

        # Input size
        h, w = (self.conf.input_height, self.conf.input_width)
        input_size = (h, w)

        # Devices
        gpu_list = get_available_gpus()
        zip_encoder, zip_decoder_b, zip_decoder_w, zip_crf = [], [], [], []
        previous_crf_names = []
        restore_vars = []
        self.loaders = []

        self.im_list = []

        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                # Load reader
                with tf.name_scope("create_inputs"):
                    reader = ImageReader(self.conf.data_dir,
                                         self.conf.data_list, input_size,
                                         self.conf.random_scale,
                                         self.conf.random_mirror,
                                         self.conf.ignore_label, IMG_MEAN,
                                         self.coord)
                    self.image_batch, self.label_batch = reader.dequeue(
                        self.conf.batch_size)
                    self.im_list.append(self.image_batch)
                    image_batch_075 = tf.image.resize_images(
                        self.image_batch,
                        [int(h * 0.75), int(w * 0.75)])
                    image_batch_05 = tf.image.resize_images(
                        self.image_batch,
                        [int(h * 0.5), int(w * 0.5)])

                # Create network
                with tf.variable_scope('', reuse=False):
                    net = Deeplab_v2(self.image_batch,
                                     self.conf.num_classes,
                                     True,
                                     rescale075=False,
                                     rescale05=False,
                                     crf_type=self.conf.crf_type)

                with tf.variable_scope('', reuse=True):
                    net075 = Deeplab_v2(image_batch_075,
                                        self.conf.num_classes,
                                        True,
                                        rescale075=True,
                                        rescale05=False,
                                        crf_type=self.conf.crf_type)

                with tf.variable_scope('', reuse=True):
                    net05 = Deeplab_v2(image_batch_05,
                                       self.conf.num_classes,
                                       True,
                                       rescale075=False,
                                       rescale05=True,
                                       crf_type=self.conf.crf_type)

                # Variables that load from pre-trained model.
                restore_var = [
                    v for v in tf.global_variables()
                    if ('fc' not in v.name and 'crfrnn' not in v.name)
                ]
                restore_vars.append(restore_var)

                # Trainable Variables
                all_trainable = tf.trainable_variables()
                # Fine-tune part
                for name in previous_crf_names:
                    for v in all_trainable:
                        if v.name == name:
                            all_trainable.remove(v)

                crf_trainable = [
                    v for v in all_trainable
                    if ('crfrnn' in v.name and v.name not in previous_crf_names
                        )
                ]
                previous_crf_names.extend(v.name for v in crf_trainable)
                encoder_trainable = [
                    v for v in all_trainable
                    if 'fc' not in v.name and 'crfrnn' not in v.name
                ]  # lr * 1.0

                # Remove encoder_trainable from all_trainable
                #all_trainable = [v for v in all_trainable if v not in encoder_trainable]

                # Decoder part
                decoder_trainable = [
                    v for v in all_trainable
                    if 'fc' in v.name and 'crfrnn' not in v.name
                ]

                decoder_w_trainable = [
                    v for v in decoder_trainable
                    if ('weights' in v.name or 'gamma' in v.name)
                    and 'crfrnn' not in v.name
                ]  # lr * 10.0
                decoder_b_trainable = [
                    v for v in decoder_trainable
                    if ('biases' in v.name or 'beta' in v.name)
                    and 'crfrnn' not in v.name
                ]  # lr * 20.0
                # Check
                assert (len(all_trainable) == len(decoder_trainable) +
                        len(crf_trainable)) + len(encoder_trainable)
                assert (len(decoder_trainable) == len(decoder_w_trainable) +
                        len(decoder_b_trainable))

                # Network raw output
                raw_output100 = net.outputs

                raw_output075 = net075.outputs
                raw_output05 = net05.outputs
                raw_output = tf.reduce_max(tf.stack([
                    raw_output100,
                    tf.image.resize_images(raw_output075,
                                           tf.shape(raw_output100)[1:3, ]),
                    tf.image.resize_images(raw_output05,
                                           tf.shape(raw_output100)[1:3, ])
                ]),
                                           axis=0)

                # Ground Truth: ignoring all labels greater or equal than n_classes
                label_proc = prepare_label(self.label_batch,
                                           tf.stack(
                                               raw_output.get_shape()[1:3]),
                                           num_classes=self.conf.num_classes,
                                           one_hot=True)  # [batch_size, h, w]
                label_proc075 = prepare_label(
                    self.label_batch,
                    tf.stack(raw_output075.get_shape()[1:3]),
                    num_classes=self.conf.num_classes,
                    one_hot=True)
                label_proc05 = prepare_label(
                    self.label_batch,
                    tf.stack(raw_output05.get_shape()[1:3]),
                    num_classes=self.conf.num_classes,
                    one_hot=True)

                raw_gt = tf.reshape(label_proc, [
                    -1,
                ])
                raw_gt075 = tf.reshape(label_proc075, [
                    -1,
                ])
                raw_gt05 = tf.reshape(label_proc05, [
                    -1,
                ])

                indices = tf.squeeze(
                    tf.where(tf.less_equal(raw_gt, self.conf.num_classes - 1)),
                    1)
                indices075 = tf.squeeze(
                    tf.where(
                        tf.less_equal(raw_gt075, self.conf.num_classes - 1)),
                    1)
                indices05 = tf.squeeze(
                    tf.where(tf.less_equal(raw_gt05,
                                           self.conf.num_classes - 1)), 1)

                gt = tf.cast(tf.gather(raw_gt, indices), tf.int32)
                gt075 = tf.cast(tf.gather(raw_gt075, indices075), tf.int32)
                gt05 = tf.cast(tf.gather(raw_gt05, indices05), tf.int32)

                raw_prediction = tf.reshape(raw_output,
                                            [-1, self.conf.num_classes])
                raw_prediction100 = tf.reshape(raw_output100,
                                               [-1, self.conf.num_classes])
                raw_prediction075 = tf.reshape(raw_output075,
                                               [-1, self.conf.num_classes])
                raw_prediction05 = tf.reshape(raw_output05,
                                              [-1, self.conf.num_classes])

                prediction = tf.gather(raw_prediction, indices)
                prediction100 = tf.gather(raw_prediction100, indices)
                prediction075 = tf.gather(raw_prediction075, indices075)
                prediction05 = tf.gather(raw_prediction05, indices05)

                # Pixel-wise softmax_cross_entropy loss
                #loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction, labels=gt)
                loss = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction,
                    labels=tf.reshape(label_proc[0],
                                      (h * w, self.conf.num_classes)))
                '''
                coefficients = [0.01460247, 1.25147725, 2.88479363, 1.20348121, 1.65261654, 1.67514772,
                                0.62338799, 0.7729363,  0.42038501, 0.98557268, 1.31867536, 0.85313332,
                                0.67227604, 1.21317965, 1.        , 0.24263748, 1.80877607, 1.3082213,
                                0.79664027, 0.72543945, 1.27823374]
                '''
                #loss = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction)
                #loss100 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction100, labels=gt)
                loss100 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction100,
                    labels=tf.reshape(label_proc[0],
                                      (h * w, self.conf.num_classes)))
                #loss100 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc[0], (h*w, self.conf.num_classes)), logits=raw_prediction100)
                #loss075 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction075, labels=gt075)
                loss075 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction075,
                    labels=tf.reshape(label_proc075[0],
                                      (int(h * 0.75) * int(w * 0.75),
                                       self.conf.num_classes)))
                #loss075 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc075[0], (int(h * 0.75) * int(w * 0.75), self.conf.num_classes)), logits=raw_prediction075)
                #loss05 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=prediction05, labels=gt05)
                loss05 = tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=raw_prediction05,
                    labels=tf.reshape(
                        label_proc05[0],
                        (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)))
                #loss05 = weighted_loss(self.conf.num_classes, coefficients, labels=tf.reshape(label_proc05[0], (int(h * 0.5) * int(w * 0.5), self.conf.num_classes)), logits=raw_prediction05)

                # L2 regularization
                l2_losses = [
                    self.conf.weight_decay * tf.nn.l2_loss(v)
                    for v in all_trainable if 'weights' in v.name
                ]

                # Loss function
                self.reduced_loss = tf.reduce_mean(loss) + tf.reduce_mean(
                    loss100) + tf.reduce_mean(loss075) + tf.reduce_mean(
                        loss05) + tf.add_n(l2_losses)

                # Define optimizers
                # 'poly' learning rate
                base_lr = tf.constant(self.conf.learning_rate)
                self.curr_step = tf.placeholder(dtype=tf.float32, shape=())
                learning_rate = tf.scalar_mul(
                    base_lr,
                    tf.pow((1 - self.curr_step / self.conf.num_steps),
                           self.conf.power))
                # We have several optimizers here in order to handle the different lr_mult
                # which is a kind of parameters in Caffe. This controls the actual lr for each
                # layer.
                opt_encoder = tf.train.MomentumOptimizer(
                    learning_rate, self.conf.momentum)
                opt_decoder_w = tf.train.MomentumOptimizer(
                    learning_rate * 10.0, self.conf.momentum)
                opt_decoder_b = tf.train.MomentumOptimizer(
                    learning_rate * 20.0, self.conf.momentum)
                opt_crf = tf.train.MomentumOptimizer(learning_rate,
                                                     self.conf.momentum)

                # Gradient accumulation
                # Define a variable to accumulate gradients.
                accum_grads = [
                    tf.Variable(tf.zeros_like(v.initialized_value()),
                                trainable=False) for v in encoder_trainable +
                    decoder_w_trainable + decoder_b_trainable + crf_trainable
                ]

                # Define an operation to clear the accumulated gradients for next batch.
                self.zero_op = [
                    v.assign(tf.zeros_like(v)) for v in accum_grads
                ]
                # To make sure each layer gets updated by different lr's, we do not use 'minimize' here.
                # Instead, we separate the steps compute_grads+update_params.
                # Compute grads
                grads = tf.gradients(
                    self.reduced_loss, encoder_trainable +
                    decoder_w_trainable + decoder_b_trainable + crf_trainable)
                # Accumulate and normalise the gradients.
                self.accum_grads_op = [
                    accum_grads[i].assign_add(grad /
                                              self.conf.grad_update_every)
                    for i, grad in enumerate(grads)
                ]

                grads_encoder = accum_grads[:len(encoder_trainable)]
                grads_decoder_w = accum_grads[len(encoder_trainable
                                                  ):len(encoder_trainable) +
                                              len(decoder_w_trainable)]
                grads_decoder_b = accum_grads[(
                    len(encoder_trainable) +
                    len(decoder_w_trainable)):(len(encoder_trainable) +
                                               len(decoder_w_trainable) +
                                               len(decoder_b_trainable))]
                grads_crf = accum_grads[
                    len(encoder_trainable) + len(decoder_w_trainable) +
                    len(decoder_b_trainable
                        ):]  # assuming crf gradients are appended to the end

                zip_encoder.append(list(zip(grads_encoder, encoder_trainable)))
                zip_decoder_b.append(
                    list(zip(grads_decoder_b, decoder_b_trainable)))
                zip_decoder_w.append(
                    list(zip(grads_decoder_w, decoder_w_trainable)))
                zip_crf.append(list(zip(grads_crf, crf_trainable)))

        avg_grads_encoder = average_gradients(zip_encoder)
        avg_grads_decoder_w = average_gradients(zip_decoder_w)
        avg_grads_decoder_b = average_gradients(zip_decoder_b)
        avg_grads_crf = average_gradients(zip_crf)

        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                # Update params
                train_op_conv = opt_encoder.apply_gradients(avg_grads_encoder)
                train_op_fc_w = opt_decoder_w.apply_gradients(
                    avg_grads_decoder_w)
                train_op_fc_b = opt_decoder_b.apply_gradients(
                    avg_grads_decoder_b)
                train_op_crf = opt_crf.apply_gradients(avg_grads_crf)

        # Finally, get the train_op!
        update_ops = tf.get_collection(
            tf.GraphKeys.UPDATE_OPS
        )  # for collecting moving_mean and moving_variance
        with tf.control_dependencies(update_ops):
            self.train_op = tf.group(train_op_fc_w, train_op_fc_b,
                                     train_op_crf)  # train_op_conv

        # Saver for storing checkpoints of the model
        self.saver = tf.train.Saver(var_list=tf.global_variables(),
                                    max_to_keep=0)

        # Loader for loading the pre-trained model
        for i in range(len(gpu_list)):
            with tf.device(gpu_list[i]):
                self.loaders.append(tf.train.Saver(var_list=restore_vars[i]))
                #self.loaders.append(tf.train.Saver(var_list=tf.global_variables()))

        # Training summary
        # Processed predictions: for visualisation.
        raw_output_up = tf.image.resize_bilinear(raw_output, input_size)
        raw_output_up = tf.argmax(raw_output_up, axis=3)
        self.pred = tf.expand_dims(raw_output_up, axis=3)
        # Image summary.
        images_summary = tf.py_func(inv_preprocess,
                                    [self.image_batch, 1, IMG_MEAN], tf.uint8)
        labels_summary = tf.py_func(
            decode_labels, [self.label_batch, 1, self.conf.num_classes],
            tf.uint8)
        preds_summary = tf.py_func(decode_labels,
                                   [self.pred, 1, self.conf.num_classes],
                                   tf.uint8)
        self.total_summary = tf.summary.image(
            'images',
            tf.concat(axis=2,
                      values=[images_summary, labels_summary, preds_summary]),
            max_outputs=1)  # Concatenate row-wise.
        if not os.path.exists(self.conf.logdir):
            os.makedirs(self.conf.logdir)
        self.summary_writer = tf.summary.FileWriter(
            self.conf.logdir, graph=tf.get_default_graph())
コード例 #8
0
def main():
    """
        Create the model and start the training.
    """

    args = get_arguments()

    if (args.dataset not in DATASET_LIST):
        print ("The dataset is not supported")
        return False
    if (args.segmentation_model not in SEGMENTATION_MODEL_LIST):
        print ("The segmentation model is not supported")
        return False
    
    h, w = map(int, args.input_size.split(','))
    input_size = (h, w)

    num_classes = DATASET_TO_CLASSES[args.dataset]
    
    tf.set_random_seed(args.random_seed)    

    print ("load data...")
    
    # Load training batch
    train_samples=seg_dataset.get_sample( 
                                    dataset_name = args.dataset, 
                                    split = args.train_split, 
                                    dataset_dir = args.data_dir,
                                    train_crop_size = input_size,
                                    num_classes = num_classes,
                                    num_samples = args.num_train_samples,
                                    ignore_label = args.ignore_label,
                                    min_scale_factor = args.min_scale_factor,
                                    max_scale_factor = args.max_scale_factor,
                                    scale_factor_step_size = args.scale_factor_step_size,
                                    is_training = True,
                                    batch_size = args.batch_size,
                                    mean_pixel = IMG_MEAN)
    # Load validation batch
    val_samples=seg_dataset.get_sample( 
                                    dataset_name = args.dataset, 
                                    split = args.val_split, 
                                    dataset_dir = args.data_dir,
                                    train_crop_size = input_size,
                                    num_classes = num_classes,
                                    num_samples = args.num_val_samples,
                                    ignore_label = args.ignore_label,                                    
                                    min_scale_factor = 1,
                                    max_scale_factor = 1,
                                    scale_factor_step_size = 0,
                                    is_training = False,
                                    batch_size = args.batch_size,
                                    mean_pixel = IMG_MEAN)
    
    print ("create network...")

    image_batch = tf.placeholder(dtype=tf.float32, shape = [args.batch_size,input_size[0], input_size[1], 3])
    label_batch = tf.placeholder(dtype=tf.uint8, shape = [args.batch_size,input_size[0], input_size[1], 1])    
    output = build_model(inputs = image_batch, num_classes = num_classes, segmentation_model = args.segmentation_model, is_training = args.is_training)

    # define the variables that need to restore from pretraind model
    restore_var = [v for v in tf.global_variables() if args.pretrained_model in v.name]

    # compute cross_entropy as loss 
    pred_vector = tf.reshape(output, [-1, num_classes])
    label_proc = prepare_label(label_batch, output, num_classes=num_classes, one_hot=False) 
    
    label_vector = tf.reshape(label_proc, [-1,])
    indices = tf.squeeze(tf.where(tf.less_equal(label_vector, num_classes - 1)), 1)
    label_vector = tf.cast(tf.gather(label_vector, indices), tf.int32)
    pred_vector = tf.gather(pred_vector, indices)     
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits = pred_vector, labels = label_vector)
    l2_losses = [args.weight_decay * tf.nn.l2_loss(v) for v in tf.trainable_variables() if 'weights' in v.name]
    reduced_loss = tf.reduce_mean(loss) + tf.add_n(l2_losses)
    
    # obtain final segmentation results vis bilinear interpolation
    input_shape = tf.shape(image_batch)[1:3]
    pred_img = tf.image.resize_bilinear(output, size = input_shape)     
    pred_img = tf.argmax(pred_img, axis = 3)    
    pred_img = tf.expand_dims(pred_img, axis = 3)    
    pred_img = tf.cast(pred_img, tf.uint8)
    
    # Add summaries for images, labels, semantic predictions and loss
    images_summary = tf.py_func(inv_preprocess, [image_batch, args.save_num_images, IMG_MEAN], tf.uint8)
    labels_summary = tf.py_func(decode_labels, [label_batch, args.dataset, args.save_num_images], tf.uint8)
    preds_summary = tf.py_func(decode_labels, [pred_img, args.dataset, args.save_num_images], tf.uint8)
    image_summary = tf.summary.image('images', 
                                     tf.concat(axis=2, values=[images_summary, labels_summary, preds_summary]), 
                                     max_outputs=args.save_num_images) # Concatenate row-wise.
    loss_summary = tf.summary.scalar('loss',reduced_loss)
    total_summary = tf.summary.merge_all()
    train_summary_writer = tf.summary.FileWriter(args.snapshot_dir + '/train', graph=tf.get_default_graph())
    val_summary_writer = tf.summary.FileWriter(args.snapshot_dir + '/val', graph=tf.get_default_graph())
   
    # define learning rate strategy
    base_lr = tf.constant(args.learning_rate)
    step_ph = tf.placeholder(dtype=tf.float32, shape=())
    global_step = tf.get_variable(name="global_step",initializer=0,trainable = False)
    learning_rate = tf.scalar_mul(base_lr, tf.pow((1 - step_ph / args.num_steps), args.power))
    
    # Build the optimizer
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)   
    train_op = tf.train.MomentumOptimizer(learning_rate, args.momentum).minimize(reduced_loss)
        
    # start session and initialize global variables and local variables.
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)
    init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) 
    sess.run(init)
    
    # create a saver to save or restore model 
    saver = tf.train.Saver(var_list = tf.global_variables(), max_to_keep=5)
    if args.restore_from is not None:
        loader = tf.train.Saver(var_list = restore_var)
        load(loader, sess, args.restore_from)
    
    print ("start training...")    
    
    # Before loading data, we need to create coordinator and start queue runner.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    for step in range(args.num_steps):  
        
        print ("global step:", step)  
        train_batch = sess.run(train_samples)        
        feed_dict = { step_ph : step , 
                      image_batch : train_batch['image'],
                      label_batch : train_batch['label'] }
        sess.run([train_op,update_ops], feed_dict = feed_dict)
        
        # save summaries every often.
        if step % args.save_pred_every == 0:
            summary = sess.run(loss_summary, feed_dict = feed_dict)
            train_summary_writer.add_summary(summary, step)
            
            val_batch = sess.run(val_samples)
            feed_dict = { step_ph : step , 
                          image_batch:val_batch['image'], 
                          label_batch : val_batch['label'] }
            summary = sess.run(total_summary, feed_dict = feed_dict)
            val_summary_writer.add_summary(summary, step)
        
        # save checkpoint every often.
        if step % args.save_model_every == 0:
            save(saver, sess, args.snapshot_dir, step)
       
    print ("train over !!!")  

    coord.request_stop()
    coord.join(threads)