return target_labels, target_scores, target_loc if __name__ == "__main__": from anchors import ssd_anchor_all_layers import matplotlib.pyplot as plt from read_data import Reader import cv2 reader = Reader(is_training=True) while (True): value = reader.generate() image = value['image'].astype(np.int) image_copy = image.copy() true_labels = value['classes'] true_boxes = value['boxes'] image_shape = image.shape layers_anchors = ssd_anchor_all_layers(image) target_labels = [] target_scores = [] target_loc = [] t = 0
class Net(object): def __init__(self, is_training=True): self.size = cfg.TARGET_SIZE self.data_path = cfg.DATA_PATH self.model_path = cfg.MODEL_PATH self.epoches = cfg.EPOCHES self.batches = cfg.BATCHES self.lr = cfg.LEARNING_RATE self.batch_size = cfg.BATCH_SIZE self.cls_num = cfg.N_CLASSES self.reader = Reader() self.keep_rate = cfg.KEEP_RATE self.is_training = is_training self.model_name = cfg.MODEL_NAME self.x = tf.placeholder(tf.float32, [None, self.size, self.size, 3]) self.y = tf.placeholder(tf.float32, [None, self.cls_num]) self.y_hat = self.resnet(self.x) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y, logits=self.y_hat)) self.saver = tf.train.Saver() self.acc = self.calculate_acc(self.y, self.y_hat) def calculate_acc(self, labels, logits): right_pred = tf.equal(tf.argmax(labels, axis=-1), tf.argmax(logits, axis=-1)) accuracy = tf.reduce_mean(tf.cast(right_pred, tf.float32)) return accuracy def resnet(self, inputs): with tf.variable_scope('RESNET'): net = slim.conv2d(inputs, 64, [7, 7], 2, scope='conv7x7', padding='SAME') net = slim.max_pool2d(net, [2, 2], scope='pool1', padding='SAME') res = net # block1 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block2 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv2', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block3 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv3', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = slim.conv2d(net, 128, [3, 3], 2, scope='reshape1', padding='SAME') # block4 net = slim.conv2d(net, 128, [3, 3], 2, scope='conv4_3x3', padding='SAME') net = slim.conv2d(net, 128, [3, 3], 1, scope='conv4_1x1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block5 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv5', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block6 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv6', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block7 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv7', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = slim.conv2d(net, 256, [3, 3], 2, scope='reshape2', padding='SAME') # block8 net = slim.conv2d(net, 256, [3, 3], 2, scope='conv8_3x3', padding='SAME') net = slim.conv2d(net, 256, [3, 3], 1, scope='conv8_1x1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block9 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv9', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block10 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv10', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block11 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv11', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = slim.conv2d(net, 512, [3, 3], 2, scope='reshape3', padding='SAME') # block12 net = slim.conv2d(net, 512, [3, 3], 2, scope='conv12_3x3', padding='SAME') net = slim.conv2d(net, 512, [3, 3], 1, scope='conv12_1x1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block13 net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv13', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block14 net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv14', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) avg_pool = slim.avg_pool2d(net, [7, 7], scope='avg_pool') avg_pool = tf.layers.flatten(avg_pool) logits = tf.layers.dense(avg_pool, 1000) if self.is_training: logits = tf.nn.dropout(logits, keep_prob=self.keep_rate) logits = tf.layers.dense(logits, self.cls_num) return tf.nn.softmax(logits, name='softmax') def train_net(self): if not os.path.exists(self.model_path): os.makedirs(self.model_path) self.optimizer = tf.compat.v1.train.AdamOptimizer(self.lr) self.train_step = self.optimizer.minimize(self.loss) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(self.model_path) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') for epoch in range(self.epoches): loss_list = [] for batch in range(self.batches): data = self.reader.generate(self.batch_size) feed_dict = { self.x: data['images'], self.y: data['labels'] } _, loss = sess.run([self.train_step, self.loss], feed_dict) loss_list.append(loss) mean_loss = np.mean(np.array(loss_list)) acc_list = [] for _ in range(10): test_data = self.reader.generate_test(batch_size=32) test_dict = { self.x: test_data['images'], self.y: test_data['labels'] } acc = sess.run(self.acc, test_dict) acc_list.append(acc) acc = np.mean(np.array(acc_list)) print('Epoch:{} Loss{} Acc:{}'.format(epoch, mean_loss, acc)) self.saver.save(sess, self.model_name)
class Net(object): def __init__(self, is_training): self.reader = Reader(is_training) self.is_training = is_training self.learning_rate = cfg.LEARNING_RATE self.batch_size = cfg.BATCH_SIZE self.class_num = len(cfg.CLASSES) self.blocks = cfg.BLOCKS self.ratios = cfg.RATIOS self.Sk = cfg.Sk self.x = [tf.placeholder(tf.float32, [None, None, 3]) ] * self.batch_size self.true_labels = [tf.placeholder(tf.float32, [None]) ] * self.batch_size self.true_boxes = [tf.placeholder(tf.float32, [None, 4]) ] * self.batch_size self.pred_loc, self.pred_cls = self.ssd_net(self.x) self.saver = tf.train.Saver() def ssd_net(self, inputs): pred_loc_result = [] pred_score_result = [] for q in range(self.batch_size): layers = {} x = tf.expand_dims(inputs[q], axis=0) with tf.variable_scope('net', reuse=tf.AUTO_REUSE): # Block 1 net = slim.repeat(x, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1', padding='SAME') # Block 2 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2', padding='SAME') # Block 3 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3', padding='SAME') # Block 4 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') layers['block4'] = net net = slim.max_pool2d(net, [2, 2], scope='pool4', padding='SAME') # Block 5 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # Block 6 net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # Block 7 net = slim.conv2d(net, 1024, [1, 1], scope='conv7') layers['block7'] = net # Block 8 with tf.variable_scope('block8'): net = slim.conv2d(net, 256, [1, 1], scope='conv8_1_1') net = slim.conv2d(net, 512, [3, 3], 2, scope='conv8_3_3', padding='SAME') layers['block8'] = net # Block 9 with tf.variable_scope('block9'): net = slim.conv2d(net, 128, [1, 1], scope='conv9_1_1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv9_3_3', padding='SAME') layers['block9'] = net # Block 10 with tf.variable_scope('block10'): net = slim.conv2d(net, 128, [1, 1], scope='conv10_1_1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv10_3_3', padding='SAME') layers['block10'] = net # Block 11 with tf.variable_scope('block11'): net = slim.conv2d(net, 128, [1, 1], scope='conv11_1_1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv11_3_3', padding='SAME') layers['block11'] = net # Block 12 with tf.variable_scope('block12'): net = slim.conv2d(net, 128, [1, 1], scope='conv12_1_1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv12_3_3', padding='SAME') layers['block12'] = net self.layers = layers pred_loc = [] pred_score = [] for i, block in enumerate(self.blocks): with tf.variable_scope(block + '_box'): loc, score = self.ssd_multibox_layer( layers[block], self.class_num, self.ratios[i], self.Sk[i]) pred_loc.append(loc) pred_score.append(score) pred_loc_result.append(pred_loc) pred_score_result.append(pred_score) return pred_loc_result, pred_score_result def ssd_multibox_layer(self, inputs, class_num, ratio, size): num_anchors = len(size) + len(ratio) num_loc = num_anchors * 4 num_cls = num_anchors * class_num # loc loc_pred = slim.conv2d(inputs, num_loc, [3, 3], activation_fn=None, scope='conv_loc') # cls cls_pred = slim.conv2d(inputs, num_cls, [3, 3], activation_fn=None, scope='conv_cls') loc_pred = tf.reshape(loc_pred, (-1, 4)) cls_pred = tf.reshape(cls_pred, (-1, class_num)) # softmax cls_pred = slim.softmax(cls_pred, scope='softmax') return loc_pred, cls_pred def train_net(self): self.optimizer = tf.compat.v1.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.9) self.loss = [] for q in range(self.batch_size): anchors = tf.numpy_function(ssd_anchor_all_layers, [self.x], [tf.float32] * 7) # self._anchors = ssd_anchor_all_layers(self.x) target_labels = [] target_scores = [] target_loc = [] for i in range(7): t_labels, t_scores, t_loc = tf.numpy_function( ssd_bboxes_encode, [ anchors[i], self.true_boxes[q], self.true_labels[q], self.class_num ], [tf.float32, tf.float32, tf.float32]) target_labels.append(t_labels) target_scores.append(t_scores) target_loc.append(t_loc) total_cross_pos, total_cross_neg, total_loc = loss_layer( (self.pred_loc[q], self.pred_cls[q]), target_labels, target_scores, target_loc) loss = tf.add(tf.add(total_cross_pos, total_cross_neg), total_loc) self.loss.append(loss) self.loss = tf.reduce_mean(self.loss, axis=0) self.train_step = self.optimizer.minimize(self.loss) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(cfg.MODEL_PATH) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') for i in range(cfg.EPOCHES): batch_images = [] batch_labels = [] batch_boxes = [] for batch in range(self.batch_size): value = self.reader.generate() image = value['image'] true_labels = value['classes'] true_boxes = value['boxes'] batch_images.append(image) batch_labels.append(true_labels) batch_boxes.append(true_boxes) feed_dict = { self.x: batch_images, self.true_labels: batch_labels, self.true_boxes: batch_boxes } loss_value, _ = sess.run([self.loss, self.train_step], feed_dict) self.saver.save(sess, os.path.join(cfg.MODEL_PATH, 'model.ckpt')) print('epoch:{}, loss:{}'.format(self.reader.epoch, loss_value))
class Net(object): def __init__(self, is_training): self.is_training = is_training self.epoches = cfg.EPOCHES self.learning_rate = cfg.LEARNING_RATE self.num_classes = len(cfg.CLASSES) self.model_path = cfg.MODEL_PATH self.batch_size = cfg.BATCH_SIZE self.target_size = cfg.TARGET_SIZE self.keep_rate = cfg.KEEP_RATE self.reader = Reader(is_training=is_training) self.x = tf.placeholder( tf.float32, [None, self.target_size, self.target_size, 3]) self.y0 = tf.placeholder( tf.int32, [None, 14, 14]) self.y1 = tf.placeholder( tf.int32, [None, 28, 28]) self.y2 = tf.placeholder( tf.int32, [None, 56, 56]) self.y3 = tf.placeholder( tf.int32, [None, self.target_size, self.target_size]) self.y = [self.y0, self.y1, self.y2, self.y3] self.y_hat = self.network(self.x) self.loss = self.sample_loss(self.y, self.y_hat) self.saver = tf.train.Saver() def sample_loss(self, labels, logits): losses = [] for i in range(4): loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels[i], logits=logits[i] ) losses.append(tf.reduce_mean(loss)) return tf.reduce_mean(losses) def network(self, inputs): num_classes = self.num_classes train = self.is_training with tf.variable_scope('RESNET'): net = slim.conv2d(inputs, 64, [7, 7], 2, scope='conv7x7', padding='SAME') pool1 = net net = slim.max_pool2d(net, [2, 2], scope='pool1', padding='SAME') pool2 = net # block1 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv1', padding='SAME') net = tf.add(net, pool2) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block2 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv2', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block3 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv3', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = slim.conv2d(net, 128, [3, 3], 2, scope='reshape1', padding='SAME') # block4 net = slim.conv2d(net, 128, [3, 3], 2, scope='conv4_3x3', padding='SAME') pool3 = net net = slim.conv2d(net, 128, [3, 3], 1, scope='conv4_1x1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block5 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv5', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block6 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv6', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block7 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv7', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = slim.conv2d(net, 256, [3, 3], 2, scope='reshape2', padding='SAME') # block8 net = slim.conv2d(net, 256, [3, 3], 2, scope='conv8_3x3', padding='SAME') pool4 = net net = slim.conv2d(net, 256, [3, 3], 1, scope='conv8_1x1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block9 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv9', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block10 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv10', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block11 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv11', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = slim.conv2d(net, 512, [3, 3], 2, scope='reshape3', padding='SAME') # block12 net = slim.conv2d(net, 512, [3, 3], 2, scope='conv12_3x3', padding='SAME') pool5 = net net = slim.conv2d(net, 512, [3, 3], 1, scope='conv12_1x1', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block13 net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv13', padding='SAME') net = tf.add(net, res) net = tf.layers.batch_normalization(net, training=self.is_training) res = net # block14 net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv14', padding='SAME') # fc1 net = slim.conv2d(net, 4096, [1, 1], 1, scope='fc1', padding='SAME') # fc2 net = slim.conv2d(net, 4096, [1, 1], 1, scope='fc2', padding='SAME') # fc3 net = slim.conv2d(net, 1000, [1, 1], 1, scope='fc3', padding='SAME') # up_pool 1 net = self._unpool_layer(net, shape=tf.shape(pool4), num_classes=num_classes, name='up_pool1', ksize=4, stride=2) self.test = tf.nn.softmax(net, axis=-1) score_pool1 = slim.conv2d( pool4, num_classes, [1, 1], 1, scope='score_pool1') net = tf.add(net, score_pool1) up_pool1 = tf.nn.softmax(net, axis=-1) # up_pool 2 net = self._unpool_layer(net, shape=tf.shape(pool3), num_classes=num_classes, name='up_pool2', ksize=4, stride=2) score_pool2 = slim.conv2d( pool3, num_classes, [1, 1], 1, scope='score_pool2') net = tf.add(net, score_pool2) up_pool2 = tf.nn.softmax(net, axis=-1) # up_pool 3 net = self._unpool_layer(net, shape=tf.shape(pool2), num_classes=num_classes, name='up_pool3', ksize=4, stride=2) score_pool3 = slim.conv2d( pool2, num_classes, [1, 1], 1, scope='score_pool3') net = tf.add(net, score_pool3) up_pool3 = tf.nn.softmax(net, axis=-1) # up_pool 4 logits = self._unpool_layer(net, shape=tf.shape(inputs), num_classes=num_classes, name='up_pool4', ksize=8, stride=4) logits = tf.nn.softmax(logits, axis=-1) return up_pool1, up_pool2, up_pool3, logits def _unpool_layer(self, bottom, shape, num_classes, name, ksize=4, stride=2): strides = [1, stride, stride, 1] with tf.variable_scope(name): in_features = bottom.get_shape()[3].value new_shape = [shape[0], shape[1], shape[2], num_classes] output_shape = tf.stack(new_shape) f_shape = [ksize, ksize, num_classes, in_features] '''weights = tf.get_variable( 'W', f_shape, tf.float32, xavier_initializer())''' weights = self.get_deconv_filter(f_shape) deconv = tf.nn.conv2d_transpose(bottom, weights, output_shape, strides=strides, padding='SAME') return deconv def get_deconv_filter(self, f_shape): # 双线性插值 width = f_shape[0] height = f_shape[1] f = ceil(width/2.0) c = (2 * f - 1 - f % 2) / (2.0 * f) bilinear = np.zeros([f_shape[0], f_shape[1]]) for x in range(width): for y in range(height): value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) bilinear[x, y] = value weights = np.zeros(f_shape) for i in range(f_shape[2]): weights[:, :, i, i] = bilinear init = tf.constant_initializer(value=weights+0.1, dtype=tf.float32) return tf.get_variable(name="up_filter", initializer=init, shape=weights.shape) def run_test(self): with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(cfg.MODEL_PATH) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') value = self.reader.generate(1) images = value['images'] labels = value['labels'] feed_dict = {self.x: images} v = sess.run(self.y_hat, feed_dict) for i in range(4): a1 = np.squeeze(np.argmax(v[i], axis=-1)) a2 = np.squeeze(labels[i]) tmp = np.hstack((a1, a2)) plt.imshow(tmp) plt.show() t = tf.get_default_graph().get_tensor_by_name('RESNET/up_pool1/up_filter:0') t = sess.run(t) def train_net(self): if not os.path.exists(self.model_path): os.makedirs(self.model_path) self.optimizer = tf.compat.v1.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.9) self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learning_rate) self.train_step = self.optimizer.minimize(self.loss) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(cfg.MODEL_PATH) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') for i in range(cfg.EPOCHES): loss_list = [] for batch in range(cfg.BATCHES): value = self.reader.generate(self.batch_size) images = value['images'] labels = value['labels'] feed_dict = {self.x: images, self.y0: labels[0], self.y1: labels[1], self.y2: labels[2], self.y3: labels[3]} _, loss, pred = sess.run( [self.train_step, self.loss, self.y_hat], feed_dict) loss_list.append(loss) print('batch:{} loss:{}'.format(batch, loss), end='\r') loss_values = np.array(loss_list) # (64, 3) loss_values = np.mean(loss_values) with open('./result.txt', 'a') as f: f.write(str(loss_values)+'\n') self.saver.save(sess, os.path.join( cfg.MODEL_PATH, 'model.ckpt')) print('epoch:{} loss:{}'.format( self.reader.epoch, loss_values))
class Net(object): def __init__(self, is_training): self.reader = Reader(is_training) self.is_training = is_training self.learning_rate = cfg.LEARNING_RATE self.class_num = len(cfg.CLASSES) self.weight_decay = cfg.WEIGHT_DECAY self.blocks = cfg.BLOCKS self.ratios = cfg.RATIOS self.keep_rate = cfg.KEEP_RATE self.model_path = cfg.MODEL_PATH self.momentum = cfg.MOMENTUM self.Sk = cfg.Sk self.x = tf.placeholder(tf.float32, [None, None, 3]) self.true_labels = tf.placeholder(tf.float32, [None]) self.true_boxes = tf.placeholder(tf.float32, [None, 4]) self.output = self.ssd_net( tf.expand_dims(self.x, axis=0) ) self.anchors = tf.numpy_function( ssd_anchor_all_layers, [self.x], [tf.float32]*7 ) self.saver = tf.train.Saver() def ssd_net(self, inputs, scope='ssd_512_vgg'): layers = {} with tf.variable_scope(scope, 'ssd_512_vgg', [inputs], reuse=None): with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(self.weight_decay)): # Block 1 net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d( net, [2, 2], scope='pool1', padding='SAME') # Block 2 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d( net, [2, 2], scope='pool2', padding='SAME') # Block 3 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d( net, [2, 2], scope='pool3', padding='SAME') # net = tf.layers.batch_normalization(net, training=self.is_training) # Block 4 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') layers['block4'] = net net = slim.max_pool2d( net, [2, 2], scope='pool4', padding='SAME') # Block 5 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') # Block 6 net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6') # Dropout if self.is_training: net = tf.nn.dropout(net, keep_prob=self.keep_rate) # Block 7 net = slim.conv2d(net, 1024, [1, 1], scope='conv7') layers['block7'] = net # Dropout if self.is_training: net = tf.nn.dropout(net, keep_prob=self.keep_rate) # Block 8 with tf.variable_scope('block8'): net = slim.conv2d(net, 256, [1, 1], scope='conv1x1') net = slim.conv2d(net, 512, [3, 3], 2, scope='conv3x3', padding='SAME') layers['block8'] = net # Block 9 with tf.variable_scope('block9'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv3x3', padding='SAME') layers['block9'] = net # Block 10 with tf.variable_scope('block10'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv3x3', padding='SAME') layers['block10'] = net # Block 11 with tf.variable_scope('block11'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [3, 3], 2, scope='conv3x3', padding='SAME') layers['block11'] = net # Block 12 with tf.variable_scope('block12'): net = slim.conv2d(net, 128, [1, 1], scope='conv1x1') net = slim.conv2d(net, 256, [4, 4], 2, scope='conv4x4', padding='SAME') layers['block12'] = net self.layers = layers pred_loc = [] pred_score = [] for i, block in enumerate(self.blocks): with tf.variable_scope(block+'_box'): loc, score = self.ssd_multibox_layer( layers[block], self.class_num, self.ratios[i], self.Sk[i] ) pred_loc.append(loc) pred_score.append(score) return pred_loc, pred_score def ssd_multibox_layer(self, inputs, class_num, ratio, size): num_anchors = len(size) + len(ratio) num_loc = num_anchors * 4 num_cls = num_anchors * class_num # loc loc_pred = slim.conv2d( inputs, num_loc, [3, 3], activation_fn=None, scope='conv_loc') # cls cls_pred = slim.conv2d( inputs, num_cls, [3, 3], activation_fn=None, scope='conv_cls') loc_pred = tf.reshape(loc_pred, (-1, 4)) cls_pred = tf.reshape(cls_pred, (-1, class_num)) # softmax cls_pred = slim.softmax(cls_pred, scope='softmax') return loc_pred, cls_pred def train_net(self): if not os.path.exists(self.model_path): os.makedirs(self.model_path) self.target_labels = [] self.target_scores = [] self.target_loc = [] for i in range(7): target_labels, target_scores, target_loc = tf.numpy_function( ssd_bboxes_encode, [self.anchors[i], self.true_boxes, self.true_labels, self.class_num], [tf.float32, tf.float32, tf.float32] ) self.target_labels.append(target_labels) self.target_scores.append(target_scores) self.target_loc.append(target_loc) self.total_cross_pos, self.total_cross_neg, self.total_loc = loss_layer( self.output, self.target_labels, self.target_scores, self.target_loc ) self.loss = tf.add( tf.add(self.total_cross_pos, self.total_cross_neg), self.total_loc ) # gradients = self.optimizer.compute_gradients(self.loss) self.optimizer = tf.compat.v1.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=self.momentum) # self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learning_rate) self.train_step = self.optimizer.minimize(self.loss) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(self.model_path) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) log.info('Model Reload Successfully!') for i in range(cfg.EPOCHES): loss_list = [] for batch in range(cfg.BATCHES): value = self.reader.generate() image = value['image'] - cfg.PIXEL_MEANS true_labels = value['classes'] true_boxes = value['boxes'] feed_dict = {self.x: image, self.true_labels: true_labels, self.true_boxes: true_boxes} test = sess.run(self.target_scores, feed_dict) total_pos = 0 for v in test: if np.max(v) > cfg.THRESHOLD: total_pos += 1 if total_pos == 0: continue try: sess.run(self.train_step, feed_dict) loss_0, loss_1, loss_2 = sess.run( [self.total_cross_pos, self.total_cross_neg, self.total_loc], feed_dict) except KeyboardInterrupt: self.saver.save(sess, os.path.join( self.model_path, 'model.ckpt')) print('Model Saved.') raise KeyboardInterrupt loss_list.append( np.array([loss_0, loss_1, loss_2]) ) # print('batch:{},pos_loss:{},neg_loss:{},loc_loss:{}'.format( # batch, loss_0, loss_1, loss_2 # ), end='\r') loss_values = np.array(loss_list) # (64, 3) loss_values = np.mean(loss_values, axis=0) with open('./result.txt', 'a') as f: f.write(str(loss_values)+'\n') self.saver.save(sess, os.path.join( self.model_path, 'model.ckpt')) print('epoch:{},pos_loss:{},neg_loss:{},loc_loss:{}'.format( self.reader.epoch, loss_values[0], loss_values[1], loss_values[2] ))
class Net(object): def __init__(self, is_training): self.is_training = is_training self.epoches = cfg.EPOCHES self.learning_rate = cfg.LEARNING_RATE self.num_classes = len(cfg.CLASSES) self.model_path = cfg.MODEL_PATH self.batch_size = cfg.BATCH_SIZE self.target_size = cfg.TARGET_SIZE self.keep_rate = cfg.KEEP_RATE self.reader = Reader(is_training=is_training) self.x = tf.placeholder(tf.float32, [None, self.target_size, self.target_size, 3]) self.y = tf.placeholder(tf.int32, [None, self.target_size, self.target_size]) self.loss_weght = tf.placeholder( tf.float32, [None, self.target_size, self.target_size]) self.y_hat = self.network(self.x) self.loss = self.sample_loss(self.y, self.y_hat, self.loss_weght) self.saver = tf.train.Saver() def sample_loss(self, labels, logits, loss_weight): loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits) loss = tf.losses.compute_weighted_loss(losses=loss, weights=loss_weight) return tf.reduce_mean(loss) def network(self, inputs): num_classes = self.num_classes train = self.is_training with tf.variable_scope('vgg'): conv1_1 = self._conv_layer(inputs, 64, 'conv1_1') conv1_2 = self._conv_layer(conv1_1, 64, 'conv1_2') pool1 = self._max_pool(conv1_2, 'pool1') conv2_1 = self._conv_layer(pool1, 128, 'conv2_1') conv2_2 = self._conv_layer(conv2_1, 128, 'conv2_2') pool2 = self._max_pool(conv2_2, 'pool2') conv3_1 = self._conv_layer(pool2, 256, 'conv3_1') conv3_2 = self._conv_layer(conv3_1, 256, 'conv3_2') conv3_3 = self._conv_layer(conv3_2, 256, 'conv3_3') pool3 = self._max_pool(conv3_3, 'pool3') conv4_1 = self._conv_layer(pool3, 512, 'conv4_1') conv4_2 = self._conv_layer(conv4_1, 512, 'conv4_2') conv4_3 = self._conv_layer(conv4_2, 512, 'conv4_3') pool4 = self._max_pool(conv4_3, 'pool4') conv5_1 = self._conv_layer(pool4, 512, 'conv5_1') conv5_2 = self._conv_layer(conv5_1, 512, 'conv5_2') conv5_3 = self._conv_layer(conv5_2, 512, 'conv5_3') pool5 = self._max_pool(conv5_3, 'pool5') fc6 = self._conv_layer(pool5, 4096, k_size=1, name='fc6') if train: fc6 = tf.nn.dropout(fc6, self.keep_rate) fc7 = self._conv_layer(fc6, 4096, k_size=1, name='fc7') if train: fc7 = tf.nn.dropout(fc7, self.keep_rate) fc8 = self._conv_layer(fc7, 1000, k_size=1, name='fc8') upscore1 = self._upscore_layer(fc8, shape=tf.shape(pool4), num_classes=num_classes, name='upscore1', ksize=4, stride=2) score_pool4 = self._conv_layer(pool4, num_classes, k_size=1, name='score_pool4') fuse_pool4 = tf.add(upscore1, score_pool4) # self.fuse_pool4 = self.score_pool4 upscore2 = self._upscore_layer(fuse_pool4, shape=tf.shape(inputs), num_classes=num_classes, name='upscore2', ksize=32, stride=16) return tf.nn.softmax(upscore2, axis=-1) def _max_pool(self, bottom, name): pool = slim.max_pool2d(bottom, [2, 2], scope=name, padding='SAME') return pool def _conv_layer(self, bottom, filters, name, k_size=3): conv = slim.conv2d(bottom, filters, [k_size, k_size], scope=name, padding='SAME') return conv def _upscore_layer(self, bottom, shape, num_classes, name, ksize=4, stride=2): strides = [1, stride, stride, 1] with tf.variable_scope(name): in_features = bottom.get_shape()[3].value if shape is None: # Compute shape out of Bottom in_shape = tf.shape(bottom) h = ((in_shape[1] - 1) * stride) + 1 w = ((in_shape[2] - 1) * stride) + 1 new_shape = [in_shape[0], h, w, num_classes] else: new_shape = [shape[0], shape[1], shape[2], num_classes] output_shape = tf.stack(new_shape) f_shape = [ksize, ksize, num_classes, in_features] weights = self.get_deconv_filter_norm(f_shape) deconv = tf.nn.conv2d_transpose(bottom, weights, output_shape, strides=strides, padding='SAME') return deconv def get_deconv_filter(self, f_shape): width = f_shape[0] height = f_shape[1] f = ceil(width / 2.0) c = (2 * f - 1 - f % 2) / (2.0 * f) bilinear = np.zeros([f_shape[0], f_shape[1]]) for x in range(width): for y in range(height): value = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) bilinear[x, y] = value weights = np.zeros(f_shape) for i in range(f_shape[2]): weights[:, :, i, i] = bilinear init = tf.constant_initializer(value=weights, dtype=tf.float32) return tf.get_variable(name='up_filter', initializer=init, shape=weights.shape) def get_deconv_filter_norm(self, f_shape): weights = tf.Variable(tf.truncated_normal(f_shape, mean=0., stddev=0.1)) return weights def train_net(self): if not os.path.exists(self.model_path): os.makedirs(self.model_path) self.optimizer = tf.compat.v1.train.MomentumOptimizer( learning_rate=self.learning_rate, momentum=0.9) # self.optimizer = tf.compat.v1.train.AdamOptimizer(self.learning_rate) self.train_step = self.optimizer.minimize(self.loss) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(cfg.MODEL_PATH) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') for i in range(cfg.EPOCHES): loss_list = [] for batch in range(cfg.BATCHES): value = self.reader.generate(self.batch_size) images = value['images'] labels = value['labels'] weights = value['weights'] feed_dict = { self.x: images, self.y: labels, self.loss_weght: weights } _, loss = sess.run([self.train_step, self.loss], feed_dict) loss_list.append(loss) print('batch:{} loss:{}'.format(batch, loss), end='\r') loss_values = np.array(loss_list) # (64, 3) loss_values = np.mean(loss_values) with open('./result.txt', 'a') as f: f.write(str(loss_values) + '\n') self.saver.save(sess, os.path.join(cfg.MODEL_PATH, 'model.ckpt')) print('epoch:{} loss:{}'.format(self.reader.epoch, loss_values)) def test(self, image_path): if not isinstance(image_path, list): image_path = [image_path] self.is_training = False with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(cfg.MODEL_PATH) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') for path in image_path: image = self.reader.read_image(path) image, _ = self.reader.resize_image(image, with_label=False) image, _ = random_crop(image, None, with_label=False) image = np.expand_dims(image, axis=0) label_image = sess.run( self.y_hat, feed_dict={self.x: image - self.reader.pixel_means}) label_image = np.squeeze(image) label_image = np.argmax(label_image, axis=-1) label_image = self.reader.decode_label(label_image) image = np.squeeze(image).astype(np.int) result = np.vstack([image, label_image]) plt.imshow(result) plt.show()
class Net(object): def __init__(self, is_training=True): self.is_training = is_training if self.is_training: self.reader = Reader() self.batch_size = 16 self.lr = 2e-4 self.wd = 5e-3 self.epoches = 100 self.batches = 64 self.size = 96 self.label_num = 30 self.x = tf.placeholder(tf.float32, [None, self.size, self.size, 1]) self.y = tf.placeholder(tf.float32, [None, self.label_num]) self.y_hat = self.network(self.x) self.model_path = './model' self.ckpt_path = os.path.join(self.model_path, 'model.ckpt') self.saver = tf.train.Saver() def loss_layer(self, y, y_hat): loss = tf.reduce_sum(tf.square(y - y_hat)) return loss def network(self, inputs): with tf.variable_scope('net'): with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(self.wd)): # Block init net = slim.conv2d(inputs, 1024, [3, 3], 2, scope='conv_init', padding='SAME') # Block 1 net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv1', padding='SAME') net = slim.max_pool2d( net, [2, 2], scope='pool1', padding='SAME') net = tf.layers.batch_normalization( net, trainable=self.is_training, name='BN_block1') # Block 2 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d( net, [2, 2], scope='pool2', padding='SAME') net = tf.layers.batch_normalization( net, trainable=self.is_training, name='BN_block2') # Block 3 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d( net, [2, 2], scope='pool3', padding='SAME') net = tf.layers.batch_normalization( net, trainable=self.is_training, name='BN_block3') # Block 4 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d( net, [2, 2], scope='pool4', padding='SAME') net = tf.layers.batch_normalization( net, trainable=self.is_training, name='BN_block4') # Block 5 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = tf.layers.batch_normalization( net, trainable=self.is_training, name='BN_block5') # Block 6 net = slim.conv2d(net, 1024, [3, 3], 2, scope='conv6') net = tf.layers.batch_normalization( net, trainable=self.is_training, name='BN_block6') net = tf.layers.flatten(net) logits = tf.layers.dense(net, self.label_num) if self.is_training: logits = tf.layers.dropout(logits, rate=1-0.2) # logits = tf.nn.tanh(logits) return logits def train_net(self): if not os.path.exists(self.model_path): os.makedirs(self.model_path) self.loss = self.loss_layer(self.y, self.y_hat) self.optimizer = tf.compat.v1.train.AdamOptimizer(self.lr) self.train_step = self.optimizer.minimize(self.loss) with tf.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(self.model_path) if ckpt and ckpt.model_checkpoint_path: # 如果保存过模型,则在保存的模型的基础上继续训练 self.saver.restore(sess, ckpt.model_checkpoint_path) print('Model Reload Successfully!') for epoch in range(self.epoches): loss_list = [] for batch in range(self.batch_size): images, labels = self.reader.generate(self.batch_size) feed_dict = { self.x: images, self.y: labels } loss_value = sess.run(self.loss, feed_dict) _ = sess.run(self.train_step, feed_dict) loss_list.append(loss_value) loss = np.mean(np.array(loss_list)) self.saver.save(sess, self.ckpt_path) print('epoch:{} loss:{}'.format(epoch, loss)) with open('./losses.txt', 'a') as f: f.write(str(loss)+'\n') def test_net(self, image, sess): image = image.reshape((1, self.size, self.size, 1)) - 127.5 points = sess.run(self.y_hat, feed_dict={self.x: image}) points = (points * self.size).astype(np.int) return np.squeeze(points)