def __init__(self, conf_file="config2.json"): with open(conf_file) as f: self.config = json.load(f) self.num_classes = self.config["NUM_CLASSES"] self.use_vgg = self.config["USE_VGG"] if self.use_vgg is False: self.vgg_param_dict = None print("No VGG path in config, so learning from scratch") else: self.vgg16_npy_path = self.config["VGG_FILE"] self.vgg_param_dict = np.load(self.vgg16_npy_path, encoding='latin1').item() print("VGG parameter loaded") self.train_file = self.config["TRAIN_FILE"] self.val_file = self.config["VAL_FILE"] self.test_file = self.config["TEST_FILE"] self.img_prefix = self.config["IMG_PREFIX"] self.label_prefix = self.config["LABEL_PREFIX"] self.bayes = self.config["BAYES"] self.opt = self.config["OPT"] self.saved_dir = self.config["SAVE_MODEL_DIR"] self.input_w = self.config["INPUT_WIDTH"] self.input_h = self.config["INPUT_HEIGHT"] self.input_c = self.config["INPUT_CHANNELS"] self.tb_logs = self.config["TB_LOGS"] self.batch_size = self.config["BATCH_SIZE"] self.train_loss, self.train_accuracy = [], [] self.val_loss, self.val_acc = [], [] self.model_version = 0 # used for saving the model self.saver = None self.images_tr, self.labels_tr = None, None self.images_val, self.labels_val = None, None self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf.Session() self.batch_size_pl = tf.placeholder(tf.int64, shape=[], name="batch_size") self.is_training_pl = tf.placeholder(tf.bool, name="is_training") self.with_dropout_pl = tf.placeholder(tf.bool, name="with_dropout") self.keep_prob_pl = tf.placeholder(tf.float32, shape=None, name="keep_rate") self.inputs_pl = tf.placeholder(tf.float32, [None, self.input_h, self.input_w, self.input_c]) self.labels_pl = tf.placeholder(tf.int64, [None, self.input_h, self.input_w, 1]) # Before enter the images into the architecture, we need to do Local Contrast Normalization # But it seems a bit complicated, so we use Local Response Normalization which implement in Tensorflow # Reference page:https://www.tensorflow.org/api_docs/python/tf/nn/local_response_normalization self.norm1 = tf.nn.lrn(self.inputs_pl, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='norm1') # first box of convolution layer,each part we do convolution two times, so we have conv1_1, and conv1_2 self.conv1_1 = conv_layer(self.norm1, "conv1_1", [3, 3, 3, 64], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv1_2 = conv_layer(self.conv1_1, "conv1_2", [3, 3, 64, 64], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.pool1, self.pool1_index, self.shape_1 = max_pool(self.conv1_2, 'pool1') # Second box of convolution layer(4) self.conv2_1 = conv_layer(self.pool1, "conv2_1", [3, 3, 64, 128], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv2_2 = conv_layer(self.conv2_1, "conv2_2", [3, 3, 128, 128], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.pool2, self.pool2_index, self.shape_2 = max_pool(self.conv2_2, 'pool2') # Third box of convolution layer(7) self.conv3_1 = conv_layer(self.pool2, "conv3_1", [3, 3, 128, 256], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv3_2 = conv_layer(self.conv3_1, "conv3_2", [3, 3, 256, 256], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv3_3 = conv_layer(self.conv3_2, "conv3_3", [3, 3, 256, 256], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.pool3, self.pool3_index, self.shape_3 = max_pool(self.conv3_3, 'pool3') # Fourth box of convolution layer(10) if self.bayes: self.dropout1 = tf.layers.dropout(self.pool3, rate=(1 - self.keep_prob_pl), training=self.with_dropout_pl, name="dropout1") self.conv4_1 = conv_layer(self.dropout1, "conv4_1", [3, 3, 256, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) else: self.conv4_1 = conv_layer(self.pool3, "conv4_1", [3, 3, 256, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv4_2 = conv_layer(self.conv4_1, "conv4_2", [3, 3, 512, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv4_3 = conv_layer(self.conv4_2, "conv4_3", [3, 3, 512, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.pool4, self.pool4_index, self.shape_4 = max_pool(self.conv4_3, 'pool4') # Fifth box of convolution layers(13) if self.bayes: self.dropout2 = tf.layers.dropout(self.pool4, rate=(1 - self.keep_prob_pl), training=self.with_dropout_pl, name="dropout2") self.conv5_1 = conv_layer(self.dropout2, "conv5_1", [3, 3, 512, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) else: self.conv5_1 = conv_layer(self.pool4, "conv5_1", [3, 3, 512, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv5_2 = conv_layer(self.conv5_1, "conv5_2", [3, 3, 512, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.conv5_3 = conv_layer(self.conv5_2, "conv5_3", [3, 3, 512, 512], self.is_training_pl, self.use_vgg, self.vgg_param_dict) self.pool5, self.pool5_index, self.shape_5 = max_pool(self.conv5_3, 'pool5') # ---------------------So Now the encoder process has been Finished--------------------------------------# # ------------------Then Let's start Decoder Process-----------------------------------------------------# # First box of deconvolution layers(3) if self.bayes: self.dropout3 = tf.layers.dropout(self.pool5, rate=(1 - self.keep_prob_pl), training=self.with_dropout_pl, name="dropout3") self.deconv5_1 = up_sampling(self.dropout3, self.pool5_index, self.shape_5, self.batch_size_pl, name="unpool_5") else: self.deconv5_1 = up_sampling(self.pool5, self.pool5_index, self.shape_5, self.batch_size_pl, name="unpool_5") self.deconv5_2 = conv_layer(self.deconv5_1, "deconv5_2", [3, 3, 512, 512], self.is_training_pl) self.deconv5_3 = conv_layer(self.deconv5_2, "deconv5_3", [3, 3, 512, 512], self.is_training_pl) self.deconv5_4 = conv_layer(self.deconv5_3, "deconv5_4", [3, 3, 512, 512], self.is_training_pl) # Second box of deconvolution layers(6) if self.bayes: self.dropout4 = tf.layers.dropout(self.deconv5_4, rate=(1 - self.keep_prob_pl), training=self.with_dropout_pl, name="dropout4") self.deconv4_1 = up_sampling(self.dropout4, self.pool4_index, self.shape_4, self.batch_size_pl, name="unpool_4") else: self.deconv4_1 = up_sampling(self.deconv5_4, self.pool4_index, self.shape_4, self.batch_size_pl, name="unpool_4") self.deconv4_2 = conv_layer(self.deconv4_1, "deconv4_2", [3, 3, 512, 512], self.is_training_pl) self.deconv4_3 = conv_layer(self.deconv4_2, "deconv4_3", [3, 3, 512, 512], self.is_training_pl) self.deconv4_4 = conv_layer(self.deconv4_3, "deconv4_4", [3, 3, 512, 256], self.is_training_pl) # Third box of deconvolution layers(9) if self.bayes: self.dropout5 = tf.layers.dropout(self.deconv4_4, rate=(1 - self.keep_prob_pl), training=self.with_dropout_pl, name="dropout5") self.deconv3_1 = up_sampling(self.dropout5, self.pool3_index, self.shape_3, self.batch_size_pl, name="unpool_3") else: self.deconv3_1 = up_sampling(self.deconv4_4, self.pool3_index, self.shape_3, self.batch_size_pl, name="unpool_3") self.deconv3_2 = conv_layer(self.deconv3_1, "deconv3_2", [3, 3, 256, 256], self.is_training_pl) self.deconv3_3 = conv_layer(self.deconv3_2, "deconv3_3", [3, 3, 256, 256], self.is_training_pl) self.deconv3_4 = conv_layer(self.deconv3_3, "deconv3_4", [3, 3, 256, 128], self.is_training_pl) # Fourth box of deconvolution layers(11) if self.bayes: self.dropout6 = tf.layers.dropout(self.deconv3_4, rate=(1 - self.keep_prob_pl), training=self.with_dropout_pl, name="dropout6") self.deconv2_1 = up_sampling(self.dropout6, self.pool2_index, self.shape_2, self.batch_size_pl, name="unpool_2") else: self.deconv2_1 = up_sampling(self.deconv3_4, self.pool2_index, self.shape_2, self.batch_size_pl, name="unpool_2") self.deconv2_2 = conv_layer(self.deconv2_1, "deconv2_2", [3, 3, 128, 128], self.is_training_pl) self.deconv2_3 = conv_layer(self.deconv2_2, "deconv2_3", [3, 3, 128, 64], self.is_training_pl) # Fifth box of deconvolution layers(13) self.deconv1_1 = up_sampling(self.deconv2_3, self.pool1_index, self.shape_1, self.batch_size_pl, name="unpool_1") self.deconv1_2 = conv_layer(self.deconv1_1, "deconv1_2", [3, 3, 64, 64], self.is_training_pl) self.deconv1_3 = conv_layer(self.deconv1_2, "deconv1_3", [3, 3, 64, 64], self.is_training_pl) with tf.variable_scope('conv_classifier') as scope: self.kernel = variable_with_weight_decay('weights', initializer=initialization(1, 64), shape=[1, 1, 64, self.num_classes], wd=False) self.conv = tf.nn.conv2d(self.deconv1_3, self.kernel, [1, 1, 1, 1], padding='SAME') self.biases = variable_with_weight_decay('biases', tf.constant_initializer(0.0), shape=[self.num_classes], wd=False) self.logits = tf.nn.bias_add(self.conv, self.biases, name=scope.name)
def __init__(self, conf_file="config.json"): with open(conf_file) as f: self.config = json.load(f) self.num_classes = self.config["NUM_CLASSES"] self.use_vgg = self.config["USE_VGG"] if self.use_vgg is False: self.vgg_param_dict = None print("No VGG path in config, so learning from scratch") else: self.vgg16_npy_path = self.config["VGG_FILE"] self.vgg_param_dict = np.load(self.vgg16_npy_path, encoding='latin1').item() print("VGG parameter loaded") self.train_file = self.config["TRAIN_FILE"] self.val_file = self.config["VAL_FILE"] self.test_file = self.config["TEST_FILE"] self.img_prefix = self.config["IMG_PREFIX"] self.label_prefix = self.config["LABEL_PREFIX"] self.opt = self.config["OPT"] self.input_w = self.config["INPUT_WIDTH"] self.input_h = self.config["INPUT_HEIGHT"] self.input_c = self.config["INPUT_CHANNELS"] self.tb_logs = self.config["TB_LOGS"] self.saved_dir = self.config["SAVE_MODEL_DIR"] self.images_tr, self.labels_tr = None, None self.images_val, self.labels_val = None, None self.train_loss, self.train_accuracy = [], [] self.val_loss, self.val_acc = [], [] self.model_version = 0 # used for saving the model self.saver = None self.graph = tf.Graph() with self.graph.as_default(): self.sess = tf.Session() self.batch_size_pl = tf.placeholder(tf.int64, shape=[], name="batch_size") #self.is_training_pl = tf.placeholder(tf.bool, name="is_training") self.inputs_pl = tf.placeholder( tf.float32, [None, self.input_h, self.input_w, self.input_c]) self.labels_pl = tf.placeholder( tf.int64, [None, self.input_h, self.input_w, 1]) # perform local response normalization self.norm1 = tf.nn.lrn(self.inputs_pl, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='norm1') # first box of convolution layer,each part we do convolution two times, so we have conv1_1, and conv1_2 self.conv1_1 = conv_layer(self.norm1, "conv1_1", [3, 3, 3, 64], self.use_vgg, self.vgg_param_dict) self.conv1_2 = conv_layer(self.conv1_1, "conv1_2", [3, 3, 64, 64], self.use_vgg, self.vgg_param_dict) self.pool1, self.pool1_index, self.shape_1 = max_pool( self.conv1_2, 'pool1') # Second box of convolution layer(4) self.conv2_1 = conv_layer(self.pool1, "conv2_1", [3, 3, 64, 128], self.use_vgg, self.vgg_param_dict) self.conv2_2 = conv_layer(self.conv2_1, "conv2_2", [3, 3, 128, 128], self.use_vgg, self.vgg_param_dict) self.pool2, self.pool2_index, self.shape_2 = max_pool( self.conv2_2, 'pool2') # Third box of convolution layer(7) self.conv3_1 = conv_layer(self.pool2, "conv3_1", [3, 3, 128, 256], self.use_vgg, self.vgg_param_dict) self.conv3_2 = conv_layer(self.conv3_1, "conv3_2", [3, 3, 256, 256], self.use_vgg, self.vgg_param_dict) self.conv3_3 = conv_layer(self.conv3_2, "conv3_3", [3, 3, 256, 256], self.use_vgg, self.vgg_param_dict) self.pool3, self.pool3_index, self.shape_3 = max_pool( self.conv3_3, 'pool3') # Fourth box of convolution layer(10) self.conv4_1 = conv_layer(self.pool3, "conv4_1", [3, 3, 256, 512], self.use_vgg, self.vgg_param_dict) self.conv4_2 = conv_layer(self.conv4_1, "conv4_2", [3, 3, 512, 512], self.use_vgg, self.vgg_param_dict) self.conv4_3 = conv_layer(self.conv4_2, "conv4_3", [3, 3, 512, 512], self.use_vgg, self.vgg_param_dict) self.pool4, self.pool4_index, self.shape_4 = max_pool( self.conv4_3, 'pool4') # Fifth box of convolution layers(13) self.conv5_1 = conv_layer(self.pool4, "conv5_1", [3, 3, 512, 512], self.use_vgg, self.vgg_param_dict) self.conv5_2 = conv_layer(self.conv5_1, "conv5_2", [3, 3, 512, 512], self.use_vgg, self.vgg_param_dict) self.conv5_3 = conv_layer(self.conv5_2, "conv5_3", [3, 3, 512, 512], self.use_vgg, self.vgg_param_dict) self.pool5, self.pool5_index, self.shape_5 = max_pool( self.conv5_3, 'pool5') # ---------------------So Now the encoder process has been Finished--------------------------------------# # ------------------Then Let's start Decoder Process-----------------------------------------------------# # First box of deconvolution layers(3) self.deconv5_1 = up_sampling(self.pool5, self.pool5_index, self.shape_5, self.batch_size_pl, name="unpool_5") self.deconv5_2 = conv_layer(self.deconv5_1, "deconv5_2", [3, 3, 512, 512]) self.deconv5_3 = conv_layer(self.deconv5_2, "deconv5_3", [3, 3, 512, 512]) self.deconv5_4 = conv_layer(self.deconv5_3, "deconv5_4", [3, 3, 512, 512]) # Second box of deconvolution layers(6) self.deconv4_1 = up_sampling(self.deconv5_4, self.pool4_index, self.shape_4, self.batch_size_pl, name="unpool_4") self.deconv4_2 = conv_layer(self.deconv4_1, "deconv4_2", [3, 3, 512, 512]) self.deconv4_3 = conv_layer(self.deconv4_2, "deconv4_3", [3, 3, 512, 512]) self.deconv4_4 = conv_layer(self.deconv4_3, "deconv4_4", [3, 3, 512, 256]) # Third box of deconvolution layers(9) self.deconv3_1 = up_sampling(self.deconv4_4, self.pool3_index, self.shape_3, self.batch_size_pl, name="unpool_3") self.deconv3_2 = conv_layer(self.deconv3_1, "deconv3_2", [3, 3, 256, 256]) self.deconv3_3 = conv_layer(self.deconv3_2, "deconv3_3", [3, 3, 256, 256]) self.deconv3_4 = conv_layer(self.deconv3_3, "deconv3_4", [3, 3, 256, 128]) # Fourth box of deconvolution layers(11) self.deconv2_1 = up_sampling(self.deconv3_4, self.pool2_index, self.shape_2, self.batch_size_pl, name="unpool_2") self.deconv2_2 = conv_layer(self.deconv2_1, "deconv2_2", [3, 3, 128, 128]) self.deconv2_3 = conv_layer(self.deconv2_2, "deconv2_3", [3, 3, 128, 64]) # Fifth box of deconvolution layers(13) self.deconv1_1 = up_sampling(self.deconv2_3, self.pool1_index, self.shape_1, self.batch_size_pl, name="unpool_1") self.deconv1_2 = conv_layer(self.deconv1_1, "deconv1_2", [3, 3, 64, 64]) self.deconv1_3 = conv_layer(self.deconv1_2, "deconv1_3", [3, 3, 64, 64]) with tf.variable_scope('conv_classifier') as scope: self.kernel = tf.get_variable( 'weights', shape=[1, 1, 64, self.num_classes], initializer=initialization(1, 64)) self.conv = tf.nn.conv2d(self.deconv1_3, self.kernel, [1, 1, 1, 1], padding='SAME') self.biases = tf.get_variable( 'biases', shape=[self.num_classes], initializer=tf.constant_initializer(0.0)) self.logits = tf.nn.bias_add(self.conv, self.biases, name=scope.name)
def __init__(self): # Number classes possible per pixel self.n_classes = FLAGS.n_classes # Paths to dataset (training/test/validation) summary files self.train_file = os.path.join(FLAGS.data_dir, 'train.txt') self.val_file = os.path.join(FLAGS.data_dir, 'validate.txt') self.test_file = os.path.join(FLAGS.data_dir, 'test.txt') self.input_w, self.input_h, self.input_c = FLAGS.input_dims self.images_tr, self.labels_tr = None, None self.images_val, self.labels_val = None, None # Create placeholders self.batch_size_pl = tf.placeholder(tf.int64, shape=[], name="batch_size") self.is_training_pl = tf.placeholder(tf.bool, name="is_training") self.with_dropout_pl = tf.placeholder(tf.bool, name="with_dropout") self.keep_prob_pl = tf.placeholder(tf.float32, shape=None, name="keep_rate") self.inputs_pl = tf.placeholder( tf.float32, [None, self.input_h, self.input_w, self.input_c]) self.labels_pl = tf.placeholder(tf.int64, [None, self.input_h, self.input_w, 1]) ################## # SegNet Encoder # ################## # SegNet includes Local Contrast Normalization - Substituted for Local Response Normalization self.norm1 = tf.nn.lrn(self.inputs_pl, depth_radius=5, bias=1.0, alpha=0.0001, beta=0.75, name='norm1') # First set of convolution layers self.conv1_1 = conv_layer(self.norm1, "conv1_1", [3, 3, 3, 64], self.is_training_pl) self.conv1_2 = conv_layer(self.conv1_1, "conv1_2", [3, 3, 64, 64], self.is_training_pl) self.pool1, self.pool1_index, self.shape_1 = max_pool( self.conv1_2, 'pool1') # Second set of convolution layers self.conv2_1 = conv_layer(self.pool1, "conv2_1", [3, 3, 64, 128], self.is_training_pl) self.conv2_2 = conv_layer(self.conv2_1, "conv2_2", [3, 3, 128, 128], self.is_training_pl) self.pool2, self.pool2_index, self.shape_2 = max_pool( self.conv2_2, 'pool2') # Third set of convolution layers self.conv3_1 = conv_layer(self.pool2, "conv3_1", [3, 3, 128, 256], self.is_training_pl) self.conv3_2 = conv_layer(self.conv3_1, "conv3_2", [3, 3, 256, 256], self.is_training_pl) self.conv3_3 = conv_layer(self.conv3_2, "conv3_3", [3, 3, 256, 256], self.is_training_pl) self.pool3, self.pool3_index, self.shape_3 = max_pool( self.conv3_3, 'pool3') # Fourth set of convolution layers self.conv4_1 = conv_layer(self.pool3, "conv4_1", [3, 3, 256, 512], self.is_training_pl) self.conv4_2 = conv_layer(self.conv4_1, "conv4_2", [3, 3, 512, 512], self.is_training_pl) self.conv4_3 = conv_layer(self.conv4_2, "conv4_3", [3, 3, 512, 512], self.is_training_pl) self.pool4, self.pool4_index, self.shape_4 = max_pool( self.conv4_3, 'pool4') # Fifth set of convolution layers self.conv5_1 = conv_layer(self.pool4, "conv5_1", [3, 3, 512, 512], self.is_training_pl) self.conv5_2 = conv_layer(self.conv5_1, "conv5_2", [3, 3, 512, 512], self.is_training_pl) self.conv5_3 = conv_layer(self.conv5_2, "conv5_3", [3, 3, 512, 512], self.is_training_pl) self.pool5, self.pool5_index, self.shape_5 = max_pool( self.conv5_3, 'pool5') ################## # SegNet Decoder # ################## # First set of deconvolution layers self.deconv5_1 = up_sampling(self.pool5, self.pool5_index, self.shape_5, self.batch_size_pl, name="unpool_5") self.deconv5_2 = conv_layer(self.deconv5_1, "deconv5_2", [3, 3, 512, 512], self.is_training_pl) self.deconv5_3 = conv_layer(self.deconv5_2, "deconv5_3", [3, 3, 512, 512], self.is_training_pl) self.deconv5_4 = conv_layer(self.deconv5_3, "deconv5_4", [3, 3, 512, 512], self.is_training_pl) # Second set of deconvolution layers self.deconv4_1 = up_sampling(self.deconv5_4, self.pool4_index, self.shape_4, self.batch_size_pl, name="unpool_4") self.deconv4_2 = conv_layer(self.deconv4_1, "deconv4_2", [3, 3, 512, 512], self.is_training_pl) self.deconv4_3 = conv_layer(self.deconv4_2, "deconv4_3", [3, 3, 512, 512], self.is_training_pl) self.deconv4_4 = conv_layer(self.deconv4_3, "deconv4_4", [3, 3, 512, 256], self.is_training_pl) # Third set of deconvolution layers self.deconv3_1 = up_sampling(self.deconv4_4, self.pool3_index, self.shape_3, self.batch_size_pl, name="unpool_3") self.deconv3_2 = conv_layer(self.deconv3_1, "deconv3_2", [3, 3, 256, 256], self.is_training_pl) self.deconv3_3 = conv_layer(self.deconv3_2, "deconv3_3", [3, 3, 256, 256], self.is_training_pl) self.deconv3_4 = conv_layer(self.deconv3_3, "deconv3_4", [3, 3, 256, 128], self.is_training_pl) # Fourth set of deconvolution layers self.deconv2_1 = up_sampling(self.deconv3_4, self.pool2_index, self.shape_2, self.batch_size_pl, name="unpool_2") self.deconv2_2 = conv_layer(self.deconv2_1, "deconv2_2", [3, 3, 128, 128], self.is_training_pl) self.deconv2_3 = conv_layer(self.deconv2_2, "deconv2_3", [3, 3, 128, 64], self.is_training_pl) # Fifth set of deconvolution layers self.deconv1_1 = up_sampling(self.deconv2_3, self.pool1_index, self.shape_1, self.batch_size_pl, name="unpool_1") self.deconv1_2 = conv_layer(self.deconv1_1, "deconv1_2", [3, 3, 64, 64], self.is_training_pl) self.deconv1_3 = conv_layer(self.deconv1_2, "deconv1_3", [3, 3, 64, 64], self.is_training_pl) with tf.variable_scope('conv_classifier') as scope: self.kernel = variable_with_weight_decay( 'weights', initializer=initialization(1, 64), shape=[1, 1, 64, self.n_classes], wd=False) self.conv = tf.nn.conv2d(self.deconv1_3, self.kernel, [1, 1, 1, 1], padding='SAME') self.biases = variable_with_weight_decay( 'biases', tf.constant_initializer(0.0), shape=[self.n_classes], wd=False) self.logits = tf.nn.bias_add(self.conv, self.biases, name=scope.name)