def network_fn(inputs): """Fine grained classification with multiplex spatial transformation channels utilizing inception nets """ end_points = {} arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=FLAGS.weight_decay) with slim.arg_scope(arg_scope): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_alpha(inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1) end_points['stn/localization/transformer_theta'] = transformer_theta transformer_outputs = [] for theta in transformer_theta_split: transformer_outputs.append( transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear')) inception_outputs = [] transformer_outputs_shape = [FLAGS.batch_size, transformer_output_size[0], transformer_output_size[1], 3] with tf.variable_scope('classification'): for path_idx, inception_inputs in enumerate(transformer_outputs): with tf.variable_scope('path_{}'.format(path_idx)): inception_inputs.set_shape(transformer_outputs_shape) net, _ = inception_v2.inception_v2_base(inception_inputs) inception_outputs.append(net) # concatenate the endpoints: num_batch*7*7*(num_transformer*1024) multipath_outputs = tf.concat(inception_outputs, axis=-1) # final fc layer logits classification_logits = _inception_logits(multipath_outputs, NUM_CLASSES, dropout_keep_prob) end_points['stn/classification/logits'] = classification_logits return classification_logits, end_points
def transformer_inference(image): arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=0.0) with slim.arg_scope(arg_scope): with slim.arg_scope([layers_lib.batch_norm, layers_lib.dropout], is_training=False): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_alpha( image, num_transformer, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, num_transformer, axis=1) transformer_outputs = [] transformer_output_size = [ transformed_height, transformed_width ] for theta in transformer_theta_split: transformer_outputs.append( transformer(image, theta, transformer_output_size, sampling_kernel='bilinear')) return transformer_outputs
def testModelHasExpectedNumberOfParameters(self): batch_size = 5 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope(inception.inception_v2_arg_scope()): inception.inception_v2_base(inputs) total_params, _ = slim.model_analyzer.analyze_vars( slim.get_model_variables()) self.assertAlmostEqual(10173112, total_params)
def __call__(self, imgs, seqVec, seqNums, batchSize): arg_scope = inception_v2_arg_scope() with slim.arg_scope(arg_scope): _, feat = inception_v2(preprocess(imgs), num_classes=1001, is_training=False, reuse=tf.AUTO_REUSE) with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): img_inputs = MLP("img_embedding", feat, 1024, self.embedSize) img_inputs = tf.expand_dims(img_inputs, axis=1) if self.rnnType == "lstm": cell = tf.nn.rnn_cell.BasicLSTMCell(self.hiddenSize) if self.rnnType == "rnn": cell = tf.nn.rnn_cell.BasicRNNCell(self.hiddenSize) if self.rnnType == "gru": cell = tf.nn.rnn_cell.GRUCell(self.hiddenSize) cells = tf.nn.rnn_cell.MultiRNNCell([cell] * self.layerSize) if batchSize > 1: cells = tf.nn.rnn_cell.DropoutWrapper(cells, input_keep_prob=0.7, output_keep_prob=0.7, state_keep_prob=0.7) init_state = cells.zero_state(batchSize, tf.float32) _, img_states = tf.nn.dynamic_rnn(cells, img_inputs, initial_state=init_state) embeddingMat = tf.get_variable( "embeddingMat", [self.targetVocSize, self.embedSize], initializer=tf.truncated_normal_initializer(stddev=0.08)) seqVec = tf.nn.embedding_lookup(embeddingMat, seqVec) if batchSize == 1: #Test phase outputs, states = tf.nn.dynamic_rnn(cells, seqVec, initial_state=init_state) outputs = tf.reshape(outputs, [-1, self.hiddenSize]) logits = MLP("logits", outputs, self.hiddenSize, self.targetVocSize) probs = tf.nn.softmax(logits) wordVal = tf.argmax(probs, axis=1) return probs, wordVal, states, img_states, init_state else: #Training phase outputs, _ = tf.nn.dynamic_rnn(cells, seqVec, seqNums, img_states) outputs = tf.reshape(outputs, [-1, self.hiddenSize]) logits = MLP("logits", outputs, self.hiddenSize, self.targetVocSize) probs = tf.nn.softmax(logits) return probs
def stn_cnn_with_image_output(inputs, transformer_output_size, num_classes): """Fine grained classification with multiplex spatial transformation channels utilizing inception nets """ arg_scope = inception_v2.inception_v2_arg_scope(weight_decay=weight_decay) with slim.arg_scope(arg_scope): with tf.variable_scope('stn'): with tf.variable_scope('localization'): transformer_theta = localization_net_beta( inputs, NUM_TRANSFORMER, NUM_THETA_PARAMS) transformer_theta_split = tf.split(transformer_theta, NUM_TRANSFORMER, axis=1) transformer_outputs = [] for theta in transformer_theta_split: transformer_outputs.append( transformer(inputs, theta, transformer_output_size, sampling_kernel='bilinear')) return transformer_outputs
tf.FixedLenFeature([], tf.int64), "image": tf.FixedLenFeature([], tf.string) }) img = tf.decode_raw(features["image"], tf.uint8) img = tf.reshape(img, [image_pixels, image_pixels, 3]) img = tf.cast(img, tf.float32) label = tf.cast(features["label"], tf.int32) return img, label images = tf.placeholder(tf.float32, [None, image_pixels, image_pixels, 3], name="input/x_input") labels = tf.placeholder(tf.int64, [None], name="input/y_input") with slim.arg_scope(inception_v2_arg_scope()): logits, end_points = inception_v2(images, num_classes=classes, is_training=True) exclude = ['InceptionV2/Logits'] variables_to_restore = slim.get_variables_to_restore(exclude=exclude) one_hot_labels = slim.one_hot_encoding(labels, classes) loss = tf.losses.softmax_cross_entropy(onehot_labels=one_hot_labels, logits=logits) total_loss = tf.losses.get_total_loss() update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(0.00002).minimize(loss=total_loss) correct_prediction = tf.equal(labels,
def train(train_record_file, train_log_step, train_param, val_record_file, val_log_step, labels_nums, data_shape, snapshot, snapshot_prefix): ''' :param train_record_file: 训练的tfrecord文件 :param train_log_step: 显示训练过程log信息间隔 :param train_param: train参数 :param val_record_file: 验证的tfrecord文件 :param val_log_step: 显示验证过程log信息间隔 :param val_param: val参数 :param labels_nums: labels数 :param data_shape: 输入数据shape :param snapshot: 保存模型间隔 :param snapshot_prefix: 保存模型文件的前缀名 :return: ''' [base_lr,max_steps]=train_param [batch_size,resize_height,resize_width,depths]=data_shape # 获得训练和测试的样本数 train_nums=get_example_nums(train_record_file) val_nums=get_example_nums(val_record_file) print('train nums:%d,val nums:%d'%(train_nums,val_nums)) # 从record中读取图片和labels数据 # train数据,训练数据一般要求打乱顺序shuffle=True train_images, train_labels = read_records(train_record_file, resize_height, resize_width, type='normalization') train_images_batch, train_labels_batch = get_batch_images(train_images, train_labels, batch_size=batch_size, labels_nums=labels_nums, one_hot=True, shuffle=True) # val数据,验证数据可以不需要打乱数据 val_images, val_labels = read_records(val_record_file, resize_height, resize_width, type='normalization') val_images_batch, val_labels_batch = get_batch_images(val_images, val_labels, batch_size=batch_size, labels_nums=labels_nums, one_hot=True, shuffle=False) # Define the model: with slim.arg_scope(inception_v2.inception_v2_arg_scope()): out, end_points = inception_v2.inception_v2(inputs=input_images, num_classes=labels_nums, dropout_keep_prob=keep_prob, is_training=is_training) # Specify the loss function: tf.losses定义的loss函数都会自动添加到loss函数,不需要add_loss()了 tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out)#添加交叉熵损失loss=1.6 # slim.losses.add_loss(my_loss) loss = tf.losses.get_total_loss(add_regularization_losses=True)#添加正则化损失loss=2.2 # Specify the optimization scheme: optimizer = tf.train.GradientDescentOptimizer(learning_rate=base_lr) # global_step = tf.Variable(0, trainable=False) # learning_rate = tf.train.exponential_decay(0.05, global_step, 150, 0.9) # # optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9) # # train_tensor = optimizer.minimize(loss, global_step) # train_op = slim.learning.create_train_op(loss, optimizer,global_step=global_step) # 在定义训练的时候, 注意到我们使用了`batch_norm`层时,需要更新每一层的`average`和`variance`参数, # 更新的过程不包含在正常的训练过程中, 需要我们去手动像下面这样更新 # 通过`tf.get_collection`获得所有需要更新的`op` update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # 使用`tensorflow`的控制流, 先执行更新算子, 再执行训练 with tf.control_dependencies(update_ops): # create_train_op that ensures that when we evaluate it to get the loss, # the update_ops are done and the gradient updates are computed. # train_op = slim.learning.create_train_op(total_loss=loss,optimizer=optimizer) train_op = slim.learning.create_train_op(total_loss=loss, optimizer=optimizer) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)), tf.float32)) # 循环迭代过程 step_train(train_op, loss, accuracy, train_images_batch, train_labels_batch, train_nums, train_log_step, val_images_batch, val_labels_batch, val_nums, val_log_step, snapshot_prefix, snapshot)