def __init__(self, n_channel=3, n_classes=10, image_size=24): # 输入变量 self.images = tf.placeholder( dtype=tf.float32, shape=[None, image_size, image_size, n_channel], name='images') self.labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels') self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.global_step = tf.Variable(0, dtype=tf.int32, name='global_step') # 网络结构 conv_layer1 = ConvLayer(input_shape=(None, image_size, image_size, n_channel), n_size=3, n_filter=64, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv1') pool_layer1 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool1') conv_layer2 = ConvLayer(input_shape=(None, int(image_size / 2), int(image_size / 2), 64), n_size=3, n_filter=128, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv2') pool_layer2 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool2') conv_layer3 = ConvLayer(input_shape=(None, int(image_size / 4), int(image_size / 4), 128), n_size=3, n_filter=256, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv3') pool_layer3 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool3') dense_layer1 = DenseLayer(input_shape=(None, int(image_size / 8) * int(image_size / 8) * 256), hidden_dim=1024, activation='relu', dropout=True, keep_prob=self.keep_prob, batch_normal=True, weight_decay=1e-4, name='dense1') dense_layer2 = DenseLayer(input_shape=(None, 1024), hidden_dim=n_classes, activation='none', dropout=False, keep_prob=None, batch_normal=False, weight_decay=1e-4, name='dense2') # 数据流 hidden_conv1 = conv_layer1.get_output(input=self.images) hidden_pool1 = pool_layer1.get_output(input=hidden_conv1) hidden_conv2 = conv_layer2.get_output(input=hidden_pool1) hidden_pool2 = pool_layer2.get_output(input=hidden_conv2) hidden_conv3 = conv_layer3.get_output(input=hidden_pool2) hidden_pool3 = pool_layer3.get_output(input=hidden_conv3) input_dense1 = tf.reshape( hidden_pool3, [-1, int(image_size / 8) * int(image_size / 8) * 256]) output_dense1 = dense_layer1.get_output(input=input_dense1) logits = dense_layer2.get_output(input=output_dense1) # 目标函数 self.objective = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)) tf.add_to_collection('losses', self.objective) self.avg_loss = tf.add_n(tf.get_collection('losses')) # 优化器 lr = tf.cond( tf.less(self.global_step, 50000), lambda: tf.constant(0.01), lambda: tf.cond(tf.less(self.global_step, 100000), lambda: tf. constant(0.001), lambda: tf.constant(0.0001))) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.avg_loss, global_step=self.global_step) # 观察值 correct_prediction = tf.equal(self.labels, tf.argmax(logits, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
def net_2(hidden_pool1, keep_prob, image_size, n_classes): with tf.variable_scope('net_2'): conv_layer2_2 = ConvLayer(input_shape=(None, int(image_size / 2), int(image_size / 2), 64), n_size=3, n_filter=128, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv2') pool_layer2_2 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool2') conv_layer3_2 = ConvLayer(input_shape=(None, int(image_size / 4), int(image_size / 4), 128), n_size=3, n_filter=256, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv3') pool_layer3_2 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool3') dense_layer1_2 = DenseLayer(input_shape=(None, int(image_size / 8) * int(image_size / 8) * 256), hidden_dim=1024, activation='relu', dropout=True, keep_prob=keep_prob, batch_normal=True, weight_decay=1e-4, name='dense1') dense_layer2_2 = DenseLayer(input_shape=(None, 1024), hidden_dim=n_classes, activation='none', dropout=False, keep_prob=None, batch_normal=False, weight_decay=1e-4, name='dense2') # net 1 hidden_conv2_2 = conv_layer2_2.get_output(input=hidden_pool1) hidden_pool2_2 = pool_layer2_2.get_output(input=hidden_conv2_2) hidden_conv3_2 = conv_layer3_2.get_output(input=hidden_pool2_2) hidden_pool3_2 = pool_layer3_2.get_output(input=hidden_conv3_2) input_dense1_2 = tf.reshape( hidden_pool3_2, [-1, int(image_size / 8) * int(image_size / 8) * 256]) output_dense1_2 = dense_layer1_2.get_output(input=input_dense1_2) logits_2 = dense_layer2_2.get_output(input=output_dense1_2) return logits_2
def __init__(self, n_channel=3, n_classes=10, image_size=24): # 输入变量 self.images = tf.placeholder( dtype=tf.float32, shape=[None, image_size, image_size, n_channel], name='images') self.labels = tf.placeholder( dtype=tf.int64, shape=[None], name='labels') self.keep_prob = tf.placeholder( dtype=tf.float32, name='keep_prob') self.global_step = tf.Variable( 0, dtype=tf.int32, name='global_step') # 网络结构 conv_layer1 = ConvLayer( input_shape=(None, image_size, image_size, n_channel), n_size=3, n_filter=64, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv1') pool_layer1 = PoolLayer( n_size=2, stride=2, mode='max', resp_normal=True, name='pool1') conv_layer2 = ConvLayer( input_shape=(None, int(image_size/2), int(image_size/2), 64), n_size=3, n_filter=128, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv2') pool_layer2 = PoolLayer( n_size=2, stride=2, mode='max', resp_normal=True, name='pool2') conv_layer3 = ConvLayer( input_shape=(None, int(image_size/4), int(image_size/4), 128), n_size=3, n_filter=256, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv3') pool_layer3 = PoolLayer( n_size=2, stride=2, mode='max', resp_normal=True, name='pool3') dense_layer1 = DenseLayer( input_shape=(None, int(image_size/8) * int(image_size/8) * 256), hidden_dim=1024, activation='relu', dropout=True, keep_prob=self.keep_prob, batch_normal=True, weight_decay=1e-4, name='dense1') dense_layer2 = DenseLayer( input_shape=(None, 1024), hidden_dim=n_classes, activation='none', dropout=False, keep_prob=None, batch_normal=False, weight_decay=1e-4, name='dense2') # 数据流 hidden_conv1 = conv_layer1.get_output(input=self.images) hidden_pool1 = pool_layer1.get_output(input=hidden_conv1) hidden_conv2 = conv_layer2.get_output(input=hidden_pool1) hidden_pool2 = pool_layer2.get_output(input=hidden_conv2) hidden_conv3 = conv_layer3.get_output(input=hidden_pool2) hidden_pool3 = pool_layer3.get_output(input=hidden_conv3) input_dense1 = tf.reshape(hidden_pool3, [-1, int(image_size/8) * int(image_size/8) * 256]) output_dense1 = dense_layer1.get_output(input=input_dense1) logits = dense_layer2.get_output(input=output_dense1) # 目标函数 self.objective = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=self.labels)) tf.add_to_collection('losses', self.objective) self.avg_loss = tf.add_n(tf.get_collection('losses')) # 优化器 lr = tf.cond(tf.less(self.global_step, 50000), lambda: tf.constant(0.01), lambda: tf.cond(tf.less(self.global_step, 100000), lambda: tf.constant(0.001), lambda: tf.constant(0.0001))) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.avg_loss, global_step=self.global_step) # 观察值 correct_prediction = tf.equal(self.labels, tf.argmax(logits, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
def __init__(self, n_channel=3, n_classes=10, image_size=24): # 输入变量 self.images = tf.placeholder( dtype=tf.float32, shape=[None, image_size, image_size, n_channel], name='images') self.labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels') self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.global_step = tf.Variable(0, dtype=tf.int32, name='global_step') # 网络结构 conv_layer1 = ConvLayer(input_shape=(None, image_size, image_size, n_channel), n_size=3, n_filter=64, stride=1, activation='relu', batch_normal=True, weight_decay=1e-4, name='conv1') pool_layer1 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=True, name='pool1') # 数据流 hidden_conv1 = conv_layer1.get_output(input=self.images) hidden_pool1 = pool_layer1.get_output(input=hidden_conv1) #net_1 self.logits_1 = net_1(hidden_pool1, self.keep_prob, image_size, n_classes) #net 2 self.logits_2 = net_2(hidden_pool1, self.keep_prob, image_size, n_classes) #net 3 self.logits_3 = net_3(hidden_pool1, self.keep_prob, image_size, n_classes) # 目标函数 self.objective_1 = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits_1, labels=self.labels)) self.objective_2 = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits_2, labels=self.labels)) self.objective_3 = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=self.logits_3, labels=self.labels)) self.objective = self.objective_1 + self.objective_2 + self.objective_3 tf.add_to_collection('losses', self.objective) self.avg_loss = tf.add_n(tf.get_collection('losses')) # 优化器 lr = tf.cond( tf.less(self.global_step, 50000), lambda: tf.constant(0.01), lambda: tf.cond(tf.less(self.global_step, 100000), lambda: tf. constant(0.001), lambda: tf.constant(0.0001))) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.avg_loss, global_step=self.global_step) # 观察值 correct_prediction_1 = tf.equal(self.labels, tf.argmax(self.logits_1, 1)) self.accuracy_1 = tf.reduce_mean(tf.cast(correct_prediction_1, 'float')) correct_prediction_2 = tf.equal(self.labels, tf.argmax(self.logits_2, 1)) self.accuracy_2 = tf.reduce_mean(tf.cast(correct_prediction_2, 'float')) correct_prediction_3 = tf.equal(self.labels, tf.argmax(self.logits_3, 1)) self.accuracy_3 = tf.reduce_mean(tf.cast(correct_prediction_3, 'float'))
def inference(self, images): # 网络结构 conv_layer1 = ConvLayer(input_shape=(self.batch_size, self.image_size, self.image_size, self.n_channel), n_size=3, n_filter=16, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv1') pool_layer1 = PoolLayer(input_shape=(self.batch_size, self.image_size, self.image_size, 16), n_size=2, stride=2, mode='max', resp_normal=False, name='pool1') conv_layer2 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 2), int(self.image_size / 2), 16), n_size=3, n_filter=32, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv2') pool_layer2 = PoolLayer(input_shape=(self.batch_size, int(self.image_size / 2), int(self.image_size / 2), 32), n_size=2, stride=2, mode='max', resp_normal=False, name='pool2') conv_layer3 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 4), int(self.image_size / 4), 32), n_size=3, n_filter=64, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv3') pool_layer3 = PoolLayer(input_shape=(self.batch_size, int(self.image_size / 4), int(self.image_size / 4), 64), n_size=2, stride=2, mode='max', resp_normal=False, name='pool3') conv_layer4 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 8), int(self.image_size / 8), 64), n_size=3, n_filter=128, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv4') pool_layer4 = PoolLayer(input_shape=(self.batch_size, int(self.image_size / 8), int(self.image_size / 8), 128), n_size=2, stride=2, mode='max', resp_normal=False, name='pool4') conv_layer5 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 16), int(self.image_size / 16), 128), n_size=3, n_filter=256, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv5') pool_layer5 = PoolLayer(input_shape=(self.batch_size, int(self.image_size / 16), int(self.image_size / 16), 256), n_size=2, stride=2, mode='max', resp_normal=False, name='pool5') conv_layer6 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 32), int(self.image_size / 32), 256), n_size=3, n_filter=512, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv6') pool_layer6 = PoolLayer(input_shape=(self.batch_size, int(self.image_size / 32), int(self.image_size / 32), 512), n_size=2, stride=2, mode='max', resp_normal=False, name='pool6') conv_layer7 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 64), int(self.image_size / 64), 512), n_size=3, n_filter=1024, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv7') conv_layer8 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 64), int(self.image_size / 64), 1024), n_size=3, n_filter=1024, stride=1, activation='leaky_relu', batch_normal=True, weight_decay=5e-4, name='conv8') conv_layer9 = ConvLayer(input_shape=(self.batch_size, int(self.image_size / 64), int(self.image_size / 64), 1024), n_size=1, n_filter=self.n_boxes * 5, stride=1, activation='none', batch_normal=False, weight_decay=5e-4, name='conv9') # 数据流 print('\n%-10s\t%-20s\t%-20s\t%s' % ('Name', 'Filter', 'Input', 'Output')) hidden_conv1 = conv_layer1.get_output(input=images) hidden_pool1 = pool_layer1.get_output(input=hidden_conv1) hidden_conv2 = conv_layer2.get_output(input=hidden_pool1) hidden_pool2 = pool_layer2.get_output(input=hidden_conv2) hidden_conv3 = conv_layer3.get_output(input=hidden_pool2) hidden_pool3 = pool_layer3.get_output(input=hidden_conv3) hidden_conv4 = conv_layer4.get_output(input=hidden_pool3) hidden_pool4 = pool_layer4.get_output(input=hidden_conv4) hidden_conv5 = conv_layer5.get_output(input=hidden_pool4) hidden_pool5 = pool_layer5.get_output(input=hidden_conv5) hidden_conv6 = conv_layer6.get_output(input=hidden_pool5) hidden_pool6 = pool_layer6.get_output(input=hidden_conv6) hidden_conv7 = conv_layer7.get_output(input=hidden_pool6) hidden_conv8 = conv_layer8.get_output(input=hidden_conv7) hidden_conv9 = conv_layer9.get_output(input=hidden_conv8) logits = hidden_conv9 print() sys.stdout.flush() # 网络输出 return logits
def __init__(self, n_channel, n_classes, image_x_size, image_y_size, max_objects, cell_x_size, cell_y_size, conv_x_size, conv_y_size, pool_mode, box_per_cell, batch_size, object_scale, noobject_scale, coord_scale, class_scale, noobject_thresh=0.6, recall_thresh=0.5, pred_thresh=0.5, nms_thresh=0.4, is_weight_decay=False, weight_decay_scale=0.0): # 设置参数 self.n_channel = n_channel self.n_classes = n_classes + 1 self.image_x_size = image_x_size self.image_y_size = image_y_size self.max_objects = max_objects self.cell_x_size = cell_x_size self.cell_y_size = cell_y_size self.conv_x_size = conv_x_size self.conv_y_size = conv_y_size self.pool_mode = pool_mode self.n_boxes = box_per_cell self.batch_size = batch_size self.object_scale = float(object_scale) self.noobject_scale = float(noobject_scale) self.coord_scale = float(coord_scale) self.class_scale = float(class_scale) self.noobject_thresh = noobject_thresh self.recall_thresh = recall_thresh self.pred_thresh = pred_thresh self.nms_thresh = nms_thresh self.is_weight_decay = is_weight_decay self.weight_decay_scale = float(weight_decay_scale) self.n_coord = 4 # 全局变量 grid_x = numpy.array(range(0, self.cell_x_size), dtype='float32') grid_x = numpy.reshape(grid_x, newshape=(1, 1, self.cell_x_size, 1, 1)) grid_x = numpy.tile( grid_x, (self.batch_size, self.cell_y_size, 1, self.n_boxes, 1)) self.grid_x = tf.constant(grid_x, dtype=tf.float32) grid_y = numpy.array(range(0, self.cell_y_size), dtype='float32') grid_y = numpy.reshape(grid_y, newshape=(1, self.cell_y_size, 1, 1, 1)) grid_y = numpy.tile( grid_y, (self.batch_size, 1, self.cell_x_size, self.n_boxes, 1)) self.grid_y = tf.constant(grid_y, dtype=tf.float32) prior_w = numpy.array([1.0, 0.8, 0.6, 0.4, 0.2], dtype='float32') prior_w = numpy.reshape(prior_w, newshape=(1, 1, 1, self.n_boxes, 1)) prior_w = numpy.tile( prior_w, (self.batch_size, self.cell_y_size, self.cell_x_size, 1, 1)) self.prior_w = tf.constant(prior_w, dtype=tf.float32) prior_h = numpy.array([0.2, 0.4, 0.6, 0.8, 1.0], dtype='float32') prior_h = numpy.reshape(prior_h, newshape=(1, 1, 1, self.n_boxes, 1)) prior_h = numpy.tile( prior_h, (self.batch_size, self.cell_y_size, self.cell_x_size, 1, 1)) self.prior_h = tf.constant(prior_h, dtype=tf.float32) # 网络结构 print('\n%-10s\t%-25s\t%-20s\t%-20s\t%s' % ('Name', 'Filter', 'Input', 'Output', 'Field')) self.conv_layer1 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=8, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv1', input_shape=(self.image_y_size, self.image_x_size, self.n_channel)) self.pool_layer1 = PoolLayer(x_size=2, y_size=2, x_stride=2, y_stride=2, mode=self.pool_mode, resp_normal=False, name='pool1', prev_layer=self.conv_layer1) self.conv_layer2 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_x_size, x_stride=1, y_stride=1, n_filter=16, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv2', prev_layer=self.pool_layer1) self.conv_layer3 = ConvLayer(x_size=1, y_size=1, x_stride=1, y_stride=1, n_filter=8, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv3', prev_layer=self.conv_layer2) self.conv_layer4 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_x_size, x_stride=1, y_stride=1, n_filter=16, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv4', prev_layer=self.conv_layer3) self.pool_layer2 = PoolLayer(x_size=2, y_size=2, x_stride=2, y_stride=2, mode=self.pool_mode, resp_normal=False, name='pool2', prev_layer=self.conv_layer4) self.conv_layer5 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=32, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv5', prev_layer=self.pool_layer2) self.conv_layer6 = ConvLayer(x_size=1, y_size=1, x_stride=1, y_stride=1, n_filter=16, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv6', prev_layer=self.conv_layer5) self.conv_layer7 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=32, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv7', prev_layer=self.conv_layer6) self.pool_layer3 = PoolLayer(x_size=2, y_size=2, x_stride=2, y_stride=2, mode=self.pool_mode, resp_normal=False, name='pool3', prev_layer=self.conv_layer7) self.conv_layer8 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=64, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv8', prev_layer=self.pool_layer3) self.conv_layer9 = ConvLayer(x_size=1, y_size=1, x_stride=1, y_stride=1, n_filter=32, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv9', prev_layer=self.conv_layer8) self.conv_layer10 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=64, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv10', prev_layer=self.conv_layer9) self.pool_layer4 = PoolLayer(x_size=2, y_size=2, x_stride=2, y_stride=2, mode=self.pool_mode, resp_normal=False, name='pool4', prev_layer=self.conv_layer10) self.conv_layer11 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=128, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv11', prev_layer=self.pool_layer4) self.conv_layer12 = ConvLayer(x_size=1, y_size=1, x_stride=1, y_stride=1, n_filter=64, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv12', prev_layer=self.conv_layer11) self.conv_layer13 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=128, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv13', prev_layer=self.conv_layer12) self.pool_layer5 = PoolLayer(x_size=2, y_size=2, x_stride=2, y_stride=2, mode=self.pool_mode, resp_normal=False, name='pool5', prev_layer=self.conv_layer13) self.conv_layer14 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=256, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv14', prev_layer=self.pool_layer5) self.conv_layer15 = ConvLayer(x_size=1, y_size=1, x_stride=1, y_stride=1, n_filter=128, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv15', prev_layer=self.conv_layer14) self.conv_layer16 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=256, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv16', prev_layer=self.conv_layer15) self.pool_layer6 = PoolLayer(x_size=2, y_size=2, x_stride=2, y_stride=2, mode=self.pool_mode, resp_normal=False, name='pool6', prev_layer=self.conv_layer16) self.conv_layer17 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=512, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv17', prev_layer=self.pool_layer6) self.conv_layer18 = ConvLayer(x_size=self.conv_x_size, y_size=self.conv_y_size, x_stride=1, y_stride=1, n_filter=512, activation='leaky_relu', batch_normal=True, weight_decay=self.weight_decay_scale, name='conv18', prev_layer=self.conv_layer17) self.conv_layer19 = ConvLayer(x_size=1, y_size=1, x_stride=1, y_stride=1, n_filter=self.n_boxes * (1 + self.n_coord + self.n_classes), activation='none', batch_normal=False, weight_decay=self.weight_decay_scale, name='conv19', prev_layer=self.conv_layer18) self.layers = [ self.conv_layer1, self.pool_layer1, self.conv_layer2, self.conv_layer3, self.conv_layer4, self.pool_layer2, self.conv_layer5, self.conv_layer6, self.conv_layer7, self.pool_layer3, self.conv_layer8, self.conv_layer9, self.conv_layer10, self.pool_layer4, self.conv_layer11, self.conv_layer12, self.conv_layer13, self.pool_layer5, self.conv_layer14, self.conv_layer15, self.conv_layer16, self.pool_layer6, self.conv_layer17, self.conv_layer18, self.conv_layer19 ] self.calculation = sum([layer.calculation for layer in self.layers]) print('calculation: %.2fM\n' % (self.calculation / 1024.0 / 1024.0))
def __init__(self, n_channel, n_classes, image_height, image_width, sentence_length, vocab_size, embedding_dim, LSTM_hidden_size): # 输入变量 self.images1 = tf.placeholder( dtype=tf.float32, shape=[None, image_height, image_width, n_channel], name='images') self.images2 = tf.placeholder( dtype=tf.float32, shape=[None, image_height, image_width, n_channel], name='images') self.labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels') ''' self.instructions = tf.placeholder( dtype=tf.int64, shape=[None, sentence_length], name='instructions') ''' self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.global_step = tf.Variable(0, dtype=tf.int32, name='global_step') # CNN网络结构 self.conv_layer1 = ConvLayer(input_shape=(None, image_height, image_width, n_channel), n_size=3, n_filter=64, stride=1, activation='relu', batch_normal=False, weight_decay=1e-4, name='conv1') self.pool_layer1 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=False, name='pool1') self.conv_layer2 = ConvLayer(input_shape=(None, int(image_height / 2), int(image_width / 2), 64), n_size=3, n_filter=128, stride=1, activation='relu', batch_normal=False, weight_decay=1e-4, name='conv2') self.pool_layer2 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=False, name='pool2') self.conv_layer3 = ConvLayer(input_shape=(None, int(image_height / 4), int(image_width / 4), 128), n_size=3, n_filter=256, stride=1, activation='relu', batch_normal=False, weight_decay=1e-4, name='conv3') self.pool_layer3 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=False, name='pool3') self.dense_layer1 = DenseLayer( input_shape=(None, int(image_height / 8) * int(image_width / 8) * 256), hidden_dim=1024, activation='relu', dropout=True, keep_prob=self.keep_prob, batch_normal=False, weight_decay=1e-4, name='dense1') self.dense_layer2 = DenseLayer(input_shape=(None, 1024), hidden_dim=512, activation='none', dropout=False, keep_prob=None, batch_normal=False, weight_decay=1e-4, name='dense2') # CNN数据流 cnn_output1 = self.get_output(self.images1, image_height, image_width) cnn_output2 = self.get_output(self.images2, image_height, image_width) ''' #LSTM embedding = tf.get_variable("embedding", [vocab_size, embedding_dim]) # [19, 128] input_embeddings = tf.nn.embedding_lookup(embedding, self.instructions) # [batch_size, 5, 128] instruc_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_hidden_size) # LSTM_hidden_size = 128 lstm_outputs, lstm_state = tf.nn.dynamic_rnn(instruc_cell, input_embeddings, dtype=tf.float32) #[batch_size, 5, 128] lstm_output = lstm_outputs[:, -1, :] #[batch_size, 128] ''' #predict fully_connected1 = DenseLayer(input_shape=(None, 512), hidden_dim=256, activation='relu', dropout=False, keep_prob=self.keep_prob, batch_normal=False, weight_decay=1e-4, name='fc1') fully_connected2 = DenseLayer(input_shape=(None, 256), hidden_dim=n_classes, activation='relu', dropout=False, keep_prob=self.keep_prob, batch_normal=False, weight_decay=1e-4, name='fc2') multi_output = tf.multiply(cnn_output1, cnn_output2, name='element_wise_multiplication') output_fc1 = fully_connected1.get_output(input=multi_output) logits = fully_connected2.get_output(input=output_fc1) logit = tf.argmax(logits, 1, name='predicted_class') # 目标函数 self.objective = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)) tf.add_to_collection('losses', self.objective) self.avg_loss = tf.add_n(tf.get_collection('losses')) # 优化器 lr = tf.cond( tf.less(self.global_step, 5000), lambda: tf.constant(0.01), lambda: tf.cond(tf.less(self.global_step, 10000), lambda: tf. constant(0.001), lambda: tf.constant(0.0001))) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.avg_loss, global_step=self.global_step) # 观察值 correct_prediction = tf.equal(self.labels, logit) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))
class ConvNet(): def __init__(self, n_channel, n_classes, image_height, image_width, sentence_length, vocab_size, embedding_dim, LSTM_hidden_size): # 输入变量 self.images1 = tf.placeholder( dtype=tf.float32, shape=[None, image_height, image_width, n_channel], name='images') self.images2 = tf.placeholder( dtype=tf.float32, shape=[None, image_height, image_width, n_channel], name='images') self.labels = tf.placeholder(dtype=tf.int64, shape=[None], name='labels') ''' self.instructions = tf.placeholder( dtype=tf.int64, shape=[None, sentence_length], name='instructions') ''' self.keep_prob = tf.placeholder(dtype=tf.float32, name='keep_prob') self.global_step = tf.Variable(0, dtype=tf.int32, name='global_step') # CNN网络结构 self.conv_layer1 = ConvLayer(input_shape=(None, image_height, image_width, n_channel), n_size=3, n_filter=64, stride=1, activation='relu', batch_normal=False, weight_decay=1e-4, name='conv1') self.pool_layer1 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=False, name='pool1') self.conv_layer2 = ConvLayer(input_shape=(None, int(image_height / 2), int(image_width / 2), 64), n_size=3, n_filter=128, stride=1, activation='relu', batch_normal=False, weight_decay=1e-4, name='conv2') self.pool_layer2 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=False, name='pool2') self.conv_layer3 = ConvLayer(input_shape=(None, int(image_height / 4), int(image_width / 4), 128), n_size=3, n_filter=256, stride=1, activation='relu', batch_normal=False, weight_decay=1e-4, name='conv3') self.pool_layer3 = PoolLayer(n_size=2, stride=2, mode='max', resp_normal=False, name='pool3') self.dense_layer1 = DenseLayer( input_shape=(None, int(image_height / 8) * int(image_width / 8) * 256), hidden_dim=1024, activation='relu', dropout=True, keep_prob=self.keep_prob, batch_normal=False, weight_decay=1e-4, name='dense1') self.dense_layer2 = DenseLayer(input_shape=(None, 1024), hidden_dim=512, activation='none', dropout=False, keep_prob=None, batch_normal=False, weight_decay=1e-4, name='dense2') # CNN数据流 cnn_output1 = self.get_output(self.images1, image_height, image_width) cnn_output2 = self.get_output(self.images2, image_height, image_width) ''' #LSTM embedding = tf.get_variable("embedding", [vocab_size, embedding_dim]) # [19, 128] input_embeddings = tf.nn.embedding_lookup(embedding, self.instructions) # [batch_size, 5, 128] instruc_cell = tf.nn.rnn_cell.BasicLSTMCell(LSTM_hidden_size) # LSTM_hidden_size = 128 lstm_outputs, lstm_state = tf.nn.dynamic_rnn(instruc_cell, input_embeddings, dtype=tf.float32) #[batch_size, 5, 128] lstm_output = lstm_outputs[:, -1, :] #[batch_size, 128] ''' #predict fully_connected1 = DenseLayer(input_shape=(None, 512), hidden_dim=256, activation='relu', dropout=False, keep_prob=self.keep_prob, batch_normal=False, weight_decay=1e-4, name='fc1') fully_connected2 = DenseLayer(input_shape=(None, 256), hidden_dim=n_classes, activation='relu', dropout=False, keep_prob=self.keep_prob, batch_normal=False, weight_decay=1e-4, name='fc2') multi_output = tf.multiply(cnn_output1, cnn_output2, name='element_wise_multiplication') output_fc1 = fully_connected1.get_output(input=multi_output) logits = fully_connected2.get_output(input=output_fc1) logit = tf.argmax(logits, 1, name='predicted_class') # 目标函数 self.objective = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.labels)) tf.add_to_collection('losses', self.objective) self.avg_loss = tf.add_n(tf.get_collection('losses')) # 优化器 lr = tf.cond( tf.less(self.global_step, 5000), lambda: tf.constant(0.01), lambda: tf.cond(tf.less(self.global_step, 10000), lambda: tf. constant(0.001), lambda: tf.constant(0.0001))) self.optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize( self.avg_loss, global_step=self.global_step) # 观察值 correct_prediction = tf.equal(self.labels, logit) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) def get_output(self, input, image_height, image_width): hidden_conv1 = self.conv_layer1.get_output(input=input) hidden_pool1 = self.pool_layer1.get_output(input=hidden_conv1) hidden_conv2 = self.conv_layer2.get_output(input=hidden_pool1) hidden_pool2 = self.pool_layer2.get_output(input=hidden_conv2) hidden_conv3 = self.conv_layer3.get_output(input=hidden_pool2) hidden_pool3 = self.pool_layer3.get_output(input=hidden_conv3) input_dense1 = tf.reshape( hidden_pool3, [-1, int(image_height / 8) * int(image_width / 8) * 256]) output_dense1 = self.dense_layer1.get_output(input=input_dense1) cnn_output = self.dense_layer2.get_output(input=output_dense1) #logit = tf.argmax(logits, 1, name='predicted_class') return cnn_output def train(self, dataloader, backup_path, n_epoch=5, batch_size=128): # 构建会话 gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.45) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # 模型保存器 self.saver = tf.train.Saver(var_list=tf.global_variables(), write_version=tf.train.SaverDef.V2, max_to_keep=10) # 模型初始化 self.sess.run(tf.global_variables_initializer()) # 模型训练 for epoch in range(0, n_epoch + 1): train_images1 = dataloader.train_images1 train_images2 = dataloader.train_images2 #train_instructions = dataloader.train_instructions train_labels = dataloader.train_labels valid_images1 = dataloader.valid_images1 valid_images2 = dataloader.valid_images2 #valid_instructions = dataloader.valid_instructions valid_labels = dataloader.valid_labels # 开始本轮的训练,并计算目标函数值 train_accuracy, train_loss = 0.0, 0.0 for i in range(0, dataloader.n_train, batch_size): batch_images1 = train_images1[i:i + batch_size] batch_images2 = train_images2[i:i + batch_size] #batch_instructions = train_instructions[i: i+batch_size] batch_labels = train_labels[i:i + batch_size] [_, avg_accuracy, avg_loss, iteration] = self.sess.run( fetches=[ self.optimizer, self.accuracy, self.avg_loss, self.global_step ], feed_dict={ self.images1: batch_images1, self.images2: batch_images2, #self.instructions: batch_instructions, self.labels: batch_labels, self.keep_prob: 0.5 }) train_accuracy += avg_accuracy * batch_images1.shape[0] train_loss += avg_loss * batch_images1.shape[0] train_accuracy = 1.0 * train_accuracy / dataloader.n_train train_loss = 1.0 * train_loss / dataloader.n_train # 在训练之后,获得本轮的验证集损失值和准确率 valid_accuracy, valid_loss = 0.0, 0.0 for i in range(0, dataloader.n_valid, batch_size): batch_images1 = valid_images1[i:i + batch_size] batch_images2 = valid_images2[i:i + batch_size] #batch_instructions = valid_instructions[i: i+batch_size] batch_labels = valid_labels[i:i + batch_size] [avg_accuracy, avg_loss] = self.sess.run( fetches=[self.accuracy, self.avg_loss], feed_dict={ self.images1: batch_images1, self.images2: batch_images2, #self.instructions: batch_instructions, self.labels: batch_labels, self.keep_prob: 1.0 }) valid_accuracy += avg_accuracy * batch_images1.shape[0] valid_loss += avg_loss * batch_images1.shape[0] valid_accuracy = 1.0 * valid_accuracy / dataloader.n_valid valid_loss = 1.0 * valid_loss / dataloader.n_valid print( 'epoch{%d}, iter[%d], train precision: %.6f, train loss: %.6f, ' 'valid precision: %.6f, valid loss: %.6f' % (epoch, iteration, train_accuracy, train_loss, valid_accuracy, valid_loss)) sys.stdout.flush() # 保存模型 if not os.path.exists(backup_path): os.makedirs(backup_path) saver_path = self.saver.save( self.sess, os.path.join(backup_path, 'model_%d.ckpt' % (epoch))) self.sess.close() def test(self, dataloader, backup_path, epoch, batch_size=128): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.25) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # 读取模型 self.saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) model_path = os.path.join(backup_path, 'model_%d.ckpt' % (epoch)) assert (os.path.exists(model_path + '.index')) self.saver.restore(self.sess, model_path) print('read model from %s' % (model_path)) # 在测试集上计算准确率 accuracy_list = [] test_images = dataloader.data_augmentation(dataloader.test_images, flip=False, crop=True, crop_shape=(24, 24, 3), whiten=True, noise=False) test_labels = dataloader.test_labels for i in range(0, dataloader.n_test, batch_size): batch_images = test_images[i:i + batch_size] batch_labels = test_labels[i:i + batch_size] [avg_accuracy] = self.sess.run(fetches=[self.accuracy], feed_dict={ self.images: batch_images, self.labels: batch_labels, self.keep_prob: 1.0 }) accuracy_list.append(avg_accuracy) print('test precision: %.4f' % (numpy.mean(accuracy_list))) self.sess.close() def debug(self): sess = tf.Session() sess.run(tf.global_variables_initializer()) [temp] = sess.run(fetches=[self.observe], feed_dict={ self.images: numpy.random.random(size=[128, 24, 24, 3]), self.labels: numpy.random.randint(low=0, high=9, size=[ 128, ]), self.keep_prob: 1.0 }) print(temp) def observe_salience(self, batch_size=128, image_h=32, image_w=32, n_channel=3, num_test=10, epoch=1): if not os.path.exists('results/epoch%d/' % (epoch)): os.makedirs('results/epoch%d/' % (epoch)) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) sess = tf.Session() # 读取模型 model_path = 'backup/cifar10/model_%d.ckpt' % (epoch) assert (os.path.exists(model_path + '.index')) saver.restore(sess, model_path) print('read model from %s' % (model_path)) # 获取图像并计算梯度 for batch in range(num_test): batch_image, batch_label = cifar10.test.next_batch(batch_size) image = numpy.array( batch_image.reshape([image_h, image_w, n_channel]) * 255, dtype='uint8') result = sess.run( [ self.labels_prob, self.labels_max_prob, self.labels_pred, self.gradient ], feed_dict={ self.images: batch_image, self.labels: batch_label, self.keep_prob: 0.5 }) print(result[0:3], result[3][0].shape) gradient = sess.run(self.gradient, feed_dict={ self.images: batch_image, self.keep_prob: 0.5 }) gradient = gradient[0].reshape([image_h, image_w, n_channel]) gradient = numpy.max(gradient, axis=2) gradient = numpy.array((gradient - gradient.min()) * 255 / (gradient.max() - gradient.min()), dtype='uint8') print(gradient.shape) # 使用pyplot画图 plt.subplot(121) plt.imshow(image) plt.subplot(122) plt.imshow(gradient, cmap=plt.cm.gray) plt.savefig('results/epoch%d/result_%d.png' % (epoch, batch)) def observe_hidden_distribution(self, batch_size=128, image_h=32, image_w=32, n_channel=3, num_test=10, epoch=1): if not os.path.exists('results/epoch%d/' % (epoch)): os.makedirs('results/epoch%d/' % (epoch)) saver = tf.train.Saver(write_version=tf.train.SaverDef.V2) sess = tf.Session() # 读取模型 model_path = 'backup/cifar10/model_%d.ckpt' % (epoch) if os.path.exists(model_path + '.index'): saver.restore(sess, model_path) print('read model from %s' % (model_path)) else: sess.run(tf.global_variables_initializer()) # 获取图像并计算梯度 for batch in range(num_test): batch_image, batch_label = cifar10.test.next_batch(batch_size) result = sess.run( [ self.nobn_conv1, self.bn_conv1, self.nobn_conv2, self.bn_conv2, self.nobn_conv3, self.bn_conv3, self.nobn_fc1, self.nobn_fc1, self.nobn_softmax, self.bn_softmax ], feed_dict={ self.images: batch_image, self.labels: batch_label, self.keep_prob: 0.5 }) distribution1 = result[0][:, 0].flatten() distribution2 = result[1][:, 0].flatten() distribution3 = result[2][:, 0].flatten() distribution4 = result[3][:, 0].flatten() distribution5 = result[4][:, 0].flatten() distribution6 = result[5][:, 0].flatten() distribution7 = result[6][:, 0].flatten() distribution8 = result[7][:, 0].flatten() plt.subplot(241) plt.hist(distribution1, bins=50, color='#1E90FF') plt.title('convolutional layer 1') plt.subplot(242) plt.hist(distribution3, bins=50, color='#1C86EE') plt.title('convolutional layer 2') plt.subplot(243) plt.hist(distribution5, bins=50, color='#1874CD') plt.title('convolutional layer 3') plt.subplot(244) plt.hist(distribution7, bins=50, color='#5CACEE') plt.title('full connection layer') plt.subplot(245) plt.hist(distribution2, bins=50, color='#00CED1') plt.title('batch normalized') plt.subplot(246) plt.hist(distribution4, bins=50, color='#48D1CC') plt.title('batch normalized') plt.subplot(247) plt.hist(distribution6, bins=50, color='#40E0D0') plt.title('batch normalized') plt.subplot(248) plt.hist(distribution8, bins=50, color='#00FFFF') plt.title('batch normalized') plt.show()