Esempio n. 1
0
num_epochs = None
learning_rate = 0.000001
training_rounds = 50000
# Means for RGB channels seperately from VGG-16 data preprocessing
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
filename = '/home/kris/PycharmProjects/eecs545/pascal_train.tfrecords'
fcn32s_parameters_dir = '/home/kris/PycharmProjects/eecs545/fcn_32s/fcn32s.ckpt'
vgg_16_parameters_dir = '/home/kris/PycharmProjects/eecs545/vgg_16.ckpt'
""" End of hyperparameters definition """

# Add them to queue of filenames
filename_queue = tf.train.string_input_producer([filename], num_epochs=num_epochs)
# Read training data from tfrecords to tensors
images, labels = read_data_from_tfrecord.read_data_from_tfrecords(filename_queue, batch_size, is_training=True)
labels = tf.squeeze(labels, axis = -1)
# Preprocess the data, move to zero mean & normalization(no need for unsigned images)
images = tf.cast(images, tf.float32)
images = images - [_R_MEAN, _G_MEAN, _B_MEAN]

logits, vgg_variables = model.fcn_32s(images, num_classes, is_training=True)
valid_labels, valid_logits = utils.remove_ambiguous(labels, logits)

loss = tf.nn.softmax_cross_entropy_with_logits(labels=valid_labels, logits=valid_logits)
loss = tf.reduce_mean(loss)

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss=loss)

# Most of parameters could be intialized with pre-trained parameters of original vgg-16 network
vgg_variables_without_fc8 = utils.extract_vgg_variables(vgg_variables)
num_classes = 21
num_epochs = 1
# Means for RGB channels seperately from VGG-16 data preprocessing
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
filename = '/home/kris/PycharmProjects/eecs545/pascal_val.tfrecords'
fcn32s_parameters_dir = '/home/kris/PycharmProjects/eecs545/fcn_32s/fcn32s.ckpt'
""" end of hyperparameters definition """

# Add them to queue of filenames
filename_queue = tf.train.string_input_producer([filename],
                                                num_epochs=num_epochs)

# Read training data from tfrecords to tensors
image, label = read_data_from_tfrecord.read_data_from_tfrecords(
    filename_queue, None, is_training=False)
image = tf.expand_dims(image, 0)
label = tf.expand_dims(label, 0)

# Preprocess the data, move to zero mean & normalization(no need for unsigned images)
image = tf.cast(image, tf.float32)
image = image - [_R_MEAN, _G_MEAN, _B_MEAN]

# If size of image is not multiple of 32
input_size = tf.shape(image)
round_size = tf.round(tf.to_float(input_size[1:3]) / 32) * 32
resized_image = tf.image.resize_images(image, tf.to_int32(round_size))

# Predict the segmentation
logits, vgg_variables = model.fcn_32s(resized_image,
                                      num_classes,