def resnet50(x, nums, is_training=True, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Resnet", [x]): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50( # shape=(N, 1, 1, 2048) x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v2_50') net = tf.squeeze(net, axis=[1, 2]) # shape=(N, 2048) net = slim.fully_connected(net, num_outputs=nums, activation_fn=None, trainable=is_training, reuse=reuse, scope='fc') variables = tf.contrib.framework.get_variables('resnet_v2_50') return net, variables
def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Encoder_resnet", [x]): with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): net, end_points = resnet_v2.resnet_v2_50( x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v2_50') net = tf.squeeze(net, axis=[1, 2]) variables = tf.contrib.framework.get_variables('resnet_v2_50') return net, variables
def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Encoder_resnet", [x]): with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): net, end_points = resnet_v2.resnet_v2_50( x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v2_50') net = tf.squeeze(net, axis=[1, 2]) variables = tf.contrib.framework.get_variables('resnet_v2_50') return net, variables
def build_pretrained_graph(self, images, resnet_layer, checkpoint, is_training, reuse=False): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_50(images, is_training=is_training, reuse=reuse) resnet_layer = 'resnet_v2_50/block%d' % resnet_layer resnet_output = endpoints[resnet_layer] resnet_variables = slim.get_variables_to_restore() resnet_variables = [ i for i in resnet_variables if 'global_step' not in i.name ] if is_training and not reuse: init_saver = tf.train.Saver(resnet_variables) def init_fn(scaffold, sess): del scaffold init_saver.restore(sess, checkpoint) else: init_fn = None return resnet_output, resnet_variables, init_fn
def __init__(self, x, num_classes=1001, is_training=False): """Initializes the tensorflow graph for the ResNet50-v2 model. Args: x (tf.Variable): The variable in the tensorflow graph that feeds into the model nodes. num_classes (int): Number of predicted classes for classification tasks. If 0 or None, the features before the logit layer are returned. is_training (bool): Whether batch_norm layers are in training mode. """ super(ResNet50v2, self).__init__() self.x = x self.num_classes = num_classes # populating the tensorflow graph with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2_50( x, num_classes=num_classes, is_training=is_training, reuse=None) self.end_points = _get_updated_endpoints(end_points) self.variables_to_restore = slim.get_variables_to_restore(exclude=[])
def test_network(img_path): x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(x, num_classes=2, is_training=False) predictions = end_points["predictions"] saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "train.ckpt") w = 224 h = 224 c = 3 imgs = [] img = io.imread(img_path) img = transform.resize(img, (w, h, c)) imgs.append(img) data = np.asarray(imgs, np.float32) predictions_val = predictions.eval(feed_dict={x: data}) print(predictions_val)
def tower_loss(scope): images, labels = read_and_decode() if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes) elif net == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v2_50': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) else: raise Exception('No network matched with net %s.' % net) assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes) _ = cal_loss(logits, labels) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') for l in losses + [total_loss]: loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def fprop(self, x): num_original_classes = 1001 var_to_ckpt_name = lambda v: \ v.name.replace(self._var_scope+'/', '')\ .replace(':0', '') with slim.arg_scope(resnet_arg_scope()), \ tf.variable_scope(self._var_scope): x = self._preprocessing_fn(x) net, end_points = resnet_v2_50(x, num_classes=num_original_classes, is_training=False, reuse=tf.AUTO_REUSE) end_points = self._get_updated_endpoints(end_points) # Load weights for a particular scope only once if self._var_scope not in self._scopes_loaded: variables_to_restore = list( filter(lambda v: v.name.split('/')[0] == self._var_scope, slim.get_variables_to_restore(exclude=[]))) variable_name_map = { var_to_ckpt_name(v): v for v in variables_to_restore } saver = tf.train.Saver(var_list=variable_name_map) saver.restore(self._sess, self._get_latest_checkpoint_path()) self._scopes_loaded.add(self._var_scope) return end_points
def __call__(self, image_batch): if self.model == vgg16: with slim.arg_scope(vgg.vgg_arg_scope()): features, _ = self.model(inputs=image_batch) if self.model == resnet101: with slim.arg_scope(resnet.resnet_arg_scope()): features, _ = self.model(inputs=image_batch, num_classes=None) return features
def resnet_fm(input_ph): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, endpoints = resnet_v2.resnet_v2_50(input_ph, num_classes=None, is_training=False, reuse=tf.AUTO_REUSE) feature_map = tf.squeeze(net, axis=[1, 2]) return feature_map
def get_class_resnet(inputs, num_classes, is_training=False): with tf.variable_scope("classifier", custom_getter=float32_variable_storage_getter): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( inputs, num_classes, reuse=tf.AUTO_REUSE, is_training=is_training) return logits, end_points
def __init__(self, tensor, keep_prob=1.0, num_classes=1001, retrain_layer=[], weights_path='./weights/resnet_v2_101.ckpt'): # Call the parent class Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer, weights_path) # Create the Graph is_training = True if retrain_layer else False with slim.arg_scope(resnet_arg_scope()): self.final, self.endpoints = resnet_v2_101( self.tensor, num_classes=num_classes, is_training=is_training, global_pool=True # True: both height_out and width_out equal one )
def get_logits_prob(self, batch_input): """ Prediction from the model on a single batch. :param batch_input: the input batch. Must be from size [?, 224, 224, 3] :return: the logits and probabilities for the batch """ with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(batch_input, num_classes=1001, is_training=False) probs = tf.squeeze(end_points["predictions"]) probs = probs[1:] return logits, probs
def resnet_v2_50(inputs, is_training=True): blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), resnet_v2.resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), ] with slim.arg_scope(resnet_v2.resnet_arg_scope()): with tf.variable_scope('resnet_v2_50', 'resnet_v2', [inputs]): with slim.arg_scope([ slim.conv2d, resnet_v2.bottleneck, resnet_utils.stack_blocks_dense ]): with slim.arg_scope([slim.batch_norm], is_training=is_training): net = inputs with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') net = resnet_utils.stack_blocks_dense(net, blocks) # This is needed because the pre-activation variant does not have batch # normalization or activation functions in the residual unit output. See # Appendix of [2]. net = slim.batch_norm(net, activation_fn=nn_ops.relu, scope='postnorm') net = tf.reduce_mean(net, [1, 2], name='pool5', keepdims=True) return net
def extract_features_resnet50(self, im, scope_name, reuse=False): use_global_pool = True num_classes = 512 with tf.name_scope(scope_name): with slim.arg_scope(resnet_v2.resnet_arg_scope()): out, _ = resnet_v2.resnet_v2_50(inputs=im, num_classes=num_classes, global_pool=use_global_pool, is_training=self.is_training, scope='resnet_v2_50', reuse=reuse) print('\nShape after Resnet_50\n') print(out.get_shape()) out = layers.flatten(out) return out
def __init__(self, tensor, keep_prob=1.0, num_classes=1001, retrain_layer=[], weights_path='./weights/resnet_v2_101.ckpt'): # Call the parent class Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer, weights_path) # TODO This implementation has a problem while validation (is still set to training) is_training = True if retrain_layer else False with slim.arg_scope(resnet_arg_scope()): self.final, self.endpoints = resnet_v2_101(self.tensor, num_classes=num_classes, is_training=is_training)
def build_resnet50_v2(img_input, l2_weight_decay=0.01, is_training=True, prefix=''): """ Builds resnet50_v2 model from slim Returns the last five block outputs to be used transposed convolution layers """ with slim.arg_scope(resnet_v2.resnet_arg_scope(weight_decay=l2_weight_decay)): block4, endpoints = resnet_v2_50(img_input, is_training=is_training, global_pool=False) block3 = endpoints[f'{prefix}resnet_v2_50/block3'] block2 = endpoints[f'{prefix}resnet_v2_50/block2'] block1 = endpoints[f'{prefix}resnet_v2_50/block1'] conv1 = endpoints[f'{prefix}resnet_v2_50/conv1'] return conv1, block1, block2, block3, block4
def __call__(self, inputs): inputs = ((inputs / 255.0) - 0.5) * 2.0 with tf.contrib.slim.arg_scope(resnet_arg_scope()): image_features, end_points = resnet_v2_101( inputs, num_classes=self.num_classes, is_training=self.is_training, global_pool=self.global_pool, output_stride=self.output_stride, reuse=self.reuse, scope=self.scope) self.reuse = True return image_features
def test_network(img_path, label_path): x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(x, num_classes=1001, is_training=False) predictions = end_points["predictions"] saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver.restore(sess, "resnet_v2_50.ckpt"); imgfloat = tf.cast(tf.image.decode_jpeg(tf.read_file(img_path), channels=3), dtype=tf.float32) img = tf.subtract(tf.multiply(tf.div(tf.image.resize_images(tf.expand_dims(imgfloat, 0), (224, 224), method=0), 255.0), 2), 1.0) predictions_val = predictions.eval(feed_dict={x: img.eval()}) predicted_classes = np.argmax(predictions_val, axis=3) file = open(label_path, encoding="utf-8") labels = file.readlines() print(predicted_classes, labels[predicted_classes[0][0][0]])
def build_pretrained_graph( self, images, resnet_layer, checkpoint, is_training, reuse=False): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_50( images, is_training=is_training, reuse=reuse) resnet_layer = 'resnet_v2_50/block%d' % resnet_layer resnet_output = endpoints[resnet_layer] resnet_variables = slim.get_variables_to_restore() resnet_variables = [ i for i in resnet_variables if 'global_step' not in i.name] if is_training and not reuse: init_saver = tf.train.Saver(resnet_variables) def init_fn(scaffold, sess): del scaffold init_saver.restore(sess, checkpoint) else: init_fn = None return resnet_output, resnet_variables, init_fn
def fprop(self, x): """Exposes all the layers of the model. Args: x (tf.Variable): Tensor which is input to the model. Returns: dict: A dictionary mapping layer names to the corresponding node in the tensorflow graph. """ if x is self.x: return self.end_points else: with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2_50( x, num_classes=self.num_classes, is_training=False, reuse=tf.AUTO_REUSE) return _get_updated_endpoints(end_points)
def get_box_resnet(inputs, is_training=False): with tf.variable_scope("box_net", custom_getter=float32_variable_storage_getter): with slim.arg_scope(resnet_v2.resnet_arg_scope()): out, end_points = resnet_v2.resnet_v2_50(inputs, num_classes=None, global_pool=False, reuse=tf.AUTO_REUSE, is_training=is_training) l2_reg = tf.contrib.layers.l2_regularizer(scale=0.1) attn = tf.layers.conv2d(out, 2048, [1, 1], activation=None, name='attn', kernel_regularizer=l2_reg, reuse=tf.AUTO_REUSE) attn = tf.reduce_mean(attn, [3], name='attn_pool', keepdims=True) # attn = tf.layers.conv2d(out, 64, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn1',reuse=tf.AUTO_REUSE) # attn = tf.layers.conv2d(attn, 32, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn2',reuse=tf.AUTO_REUSE) # attn = tf.layers.conv2d(attn, 1,[1,1],padding='valid', activation=tf.nn.sigmoid,name='attn3',reuse=tf.AUTO_REUSE) # attn = tf.layers.conv2d(attn, 2048,[1,1],padding='same',activation=None,use_bias=False,kernel_initializer=tf.initializers.ones,name='attn4',trainable=False,reuse=tf.AUTO_REUSE) out = tf.multiply(attn, out) # out = tf.reduce_mean(out,[1,2],name='pool6',keepdims=True) out = tf.layers.conv2d(out, 512, [3, 3], padding='same', activation=None, name='box', reuse=tf.AUTO_REUSE) out = tf.layers.flatten(out, name='box_flatten') box_out = tf.layers.dense(out, 4, activation=None, name='box_out', reuse=tf.AUTO_REUSE) # box_out = tf.squeeze(box_out,[1,2]) return box_out, attn
def build_model(self): # 默认参数 FILTER_SIZE = (5, 5) Z_DIM = 2048 STRIDE = (2, 2) DEPTHS = [64, 128, 256, 256, 128, 32] CHANNELS = 4 N_CLASS = self.config.nclass def encoder(input, z_dim=Z_DIM, is_training=False): net = conv2d_BN(input, DEPTHS[0], FILTER_SIZE, is_training, stride=STRIDE, name='conv_1', kernel_initializer=tf.truncated_normal_initializer( stddev=0.01)) net = conv2d_BN(net, DEPTHS[1], FILTER_SIZE, is_training, stride=STRIDE, name='conv_2', kernel_initializer=tf.truncated_normal_initializer( stddev=0.01)) net = conv2d_BN(net, DEPTHS[2], FILTER_SIZE, is_training, stride=STRIDE, name='conv_3', kernel_initializer=tf.truncated_normal_initializer( stddev=0.01)) z = tf.layers.dense( tf.layers.flatten(net), z_dim, kernel_initializer=tf.truncated_normal_initializer( stddev=0.01)) z = tf.nn.relu(tf.layers.batch_normalization(z, training=is_training), name='enc') return z self.x = tf.placeholder(tf.float32, shape=[None] + self.config.input_shape, name="input") self.y = tf.placeholder(tf.int32, shape=[None], name="label") self.is_training = tf.placeholder(tf.bool, name="is_training") # network architecture batch_norm_decay = 0.997 if self.config.get( 'bn_decay') == None else self.config.bn_decay output_stride = self.config.get('output_stride') if self.config.model == "resnet": with slim.arg_scope( resnet_v2.resnet_arg_scope( batch_norm_decay=batch_norm_decay)): net, end_points = resnet_v2.resnet_v2_50( self.x, N_CLASS, is_training=self.is_training, output_stride=output_stride) logits = tf.squeeze(end_points["resnet_v2_50/logits"], axis=[1, 2]) pred = tf.nn.softmax(logits, "pred") elif self.config.model == "resnet_101": with slim.arg_scope( resnet_v2.resnet_arg_scope( batch_norm_decay=batch_norm_decay)): pred, end_points = resnet_v2.resnet_v2_101( self.x, N_CLASS, is_training=self.is_training, output_stride=output_stride) logits = tf.squeeze(end_points["resnet_v2_101/logits"], axis=[1, 2]) pred = tf.nn.softmax(logits) elif self.config.model == "resnet_v1_50": with slim.arg_scope( resnet_v2.resnet_arg_scope( batch_norm_decay=batch_norm_decay)): pred, end_points = resnet_v1.resnet_v1_50( self.x, N_CLASS, is_training=self.is_training, output_stride=output_stride) logits = tf.squeeze(end_points["resnet_v1_50/logits"], axis=[1, 2]) pred = tf.nn.softmax(logits) else: z = encoder(self.x, z_dim=Z_DIM, is_training=self.is_training) logits = tf.layers.dense( z, N_CLASS, kernel_initializer=tf.truncated_normal_initializer( stddev=0.01)) pred = tf.nn.softmax(logits) self.pred = pred with tf.name_scope("loss"): self.loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy( labels=self.y, logits=logits), name='cross_entropy') update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) print(update_ops) with tf.control_dependencies(update_ops): self.train_op = tf.train.AdamOptimizer( self.config.learning_rate).minimize( self.loss, global_step=self.global_step_tensor) correct_prediction = tf.equal( tf.argmax(pred, 1, output_type=tf.int32), self.y) self.acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def main(argv=None): # 加载处理好的数据 processed_data = np.load(INPUT_DATA) training_images = processed_data[0] n_training_examples = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print('%d training, %d validation, %d testing' % (n_training_examples, len(validation_labels), len(testing_labels))) # 定义resNet v2 50的输入, resnet_v2.default_image_size = 224 images = tf.placeholder(tf.float32, [None, 224, 224, 3], name='input_image') labels = tf.placeholder(tf.int64, [None], name='labels') # 引用定义resnetv250模型 with slim.arg_scope(resnet.resnet_arg_scope()): logits, _ = resnet.resnet_v2_50(images, num_classes=N_CLASSES) with tf.variable_scope('squeeze_logits'): logits = tf.squeeze(logits, axis=[1, 2]) trainable_var = get_trainable_variables() # 损失函数 tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits) # 训练 train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize( tf.losses.get_total_loss()) # 只训练最后一层 # train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(tf.losses.get_total_loss(), # var_list=get_trainable_variables()) # 正确率 with tf.variable_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) ckpt = tf.train.get_checkpoint_state(SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: # 加载之前训练的参数继续训练 variables_to_restore = slim.get_model_variables() print('continue training from %s' % ckpt) step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] step = int(step) ckpt = ckpt.model_checkpoint_path else: # 没有训练数据,就先迁移一部分训练好的 ckpt = TRAINED_CKPT_FILE variables_to_restore = get_tuned_variable() print('loading tuned variables from %s' % TRAINED_CKPT_FILE) step = 0 load_fn = slim.assign_from_checkpoint_fn(ckpt, variables_to_restore, ignore_missing_vars=True) # 开启会话训练 saver = tf.train.Saver() with tf.Session() as sess: # 初始化所有参数 init = tf.global_variables_initializer() sess.run(init) load_fn(sess) start = 0 end = BATCH for i in range(step + 1, step + 1 + STEPS): start_time = time.time() # 运行训练,不会更新所有参数 sess.run(train_step, feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) duration = time.time() - start_time print('current train step duration %.3f' % duration) # 输出日志 if i % 100 == 0: saver.save(sess, TRAIN_FILE, global_step=i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels }) print('Step %d Validation accuracy = %.1f%%' % (i, validation_accuracy * 100.0)) start = end if start == n_training_examples: start = 0 end = start + BATCH if end > n_training_examples: end = n_training_examples # 在测试集上测试正确率 test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels }) print('Final test accuracy = %.1f%%' % (test_accuracy * 100.0))
def main(): # 加 载 预 处 理 好 的 数 据 processed_data = np.load(INPUT_DATA, allow_pickle=True) training_images = processed_data[0] n_training_example = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print( "%d training examples, %d validation examples and %d testing examples." % (n_training_example, len(validation_labels), len(testing_labels))) # 定 义 inception-v3 的 输 入 , images 为 输 入 图 片 , labels 为 每 一 张 图 片 对 应 的 标 签 images = tf.placeholder(tf.float32, [None, 299, 299, 3], name='Input_images') labels = tf.placeholder(tf.int64, [None], name='labels') # 定 义 inception-v3 模 型 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(images, num_classes=None) with tf.variable_scope("Logits"): #将原始模型的输出数据去掉维度为2和3的维度,最后只剩维度1的batch数和维度4的300*300*3 #也就是将原来的二三四维度全部压缩到第四维度 net = tf.squeeze(logits, axis=[1, 2]) #加入一层dropout层 net = slim.dropout(net, keep_prob=0.5, scope='dropout_scope') #加入一层全连接层,指定最后输出大小 logits = slim.fully_connected(net, num_outputs=N_CLASSES, scope='fc') # 获 取 需 要 训 练 的 变 量 trainable_variables = get_trainable_variables() # 定 义 交 叉 熵 损 失 tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits, weights=1.0) # 定 义 训 练 过 程 train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize( tf.losses.get_total_loss()) # 计 算 正 确 率 with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) # 定 义 加 载 模 型 的 函 数 load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE, get_tuned_variables(), ignore_missing_vars=True) #定义保存新的训练好的模型的函数 saver = tf.train.Saver() with tf.Session() as sess: # 初 始 化 没 有 加 载 进 来 的 变 量 init = tf.global_variables_initializer() sess.run(init) #加 载 谷 歌 已 经 训 练 好 的 模 型 print('Loading tuned variables from%s' % CKPT_FILE) load_fn(sess) start = 0 end = BATCH for i in range(STEPS): # 运 行 训 练 过 程 , 这 里 不 会 更 新 全 部 的 参 数 , 只 会 更 新 指 定 的 部 分 参 数 sess.run(train_step, feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) # 输 出 日 志 if i % 5 == 0 or i + 1 == STEPS: #saver.save(sess, TRAIN_FILE, global_step = i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels }) print('Step %d: Validation accuracy = %.lf%%' % (i, validation_accuracy * 100.0)) # 因 为 在 数 据 预 处 理 的 时 候 已 经 做 过 了 打 乱 数 据 的 操 作 , 所 以 这 里 只 需 要 顺 序 使 用 训 练 数 据 start = end if start == n_training_example: start = 0 end = start + BATCH if end > n_training_example: end = n_training_example # 在 后 的 测 试 数 据 上 测 试 正 确 率 test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels }) print('Final test accuracy = %.lf%%' % (test_accuracy * 100))
def model(inputs): batch_size, height, width = config.BATCH_SIZE, config.IMAGE_SHAPE[ 0], config.IMAGE_SHAPE[1] with slim.arg_scope(resnet_v2.resnet_arg_scope()): #net, end_points = resnet_v2.resnet_v2_101(inputs, 1001, is_training=False) net, end_points = resnet_v2.resnet_v2_152( inputs, 2048, is_training=True, global_pool=False, reuse=tf.AUTO_REUSE, output_stride=config.OUTPUT_STRIDE) # print(net) kp_maps = tf.contrib.layers.conv2d(net, num_outputs=config.NUM_KP, kernel_size=(1, 1), activation_fn=tf.nn.sigmoid, stride=1, scope='kp_maps', reuse=tf.AUTO_REUSE) short_offsets = tf.contrib.layers.conv2d(net, num_outputs=2 * config.NUM_KP, kernel_size=(1, 1), activation_fn=None, stride=1, scope='short_offsets', reuse=tf.AUTO_REUSE) mid_offsets = tf.contrib.layers.conv2d(net, num_outputs=4 * config.NUM_EDGES, kernel_size=(1, 1), activation_fn=None, stride=1, scope='mid_offsets', reuse=tf.AUTO_REUSE) long_offsets = tf.contrib.layers.conv2d(net, num_outputs=2 * config.NUM_KP, kernel_size=(1, 1), activation_fn=None, stride=1, scope='long_offsets', reuse=tf.AUTO_REUSE) seg_mask = tf.contrib.layers.conv2d(net, num_outputs=1, kernel_size=(1, 1), activation_fn=tf.nn.sigmoid, stride=1, scope='seg_mask', reuse=tf.AUTO_REUSE) kp_maps = tf.image.resize_bilinear(kp_maps, (height, width), align_corners=True) short_offsets = tf.image.resize_bilinear(short_offsets, (height, width), align_corners=True) mid_offsets = tf.image.resize_bilinear(mid_offsets, (height, width), align_corners=True) long_offsets = tf.image.resize_bilinear(long_offsets, (height, width), align_corners=True) seg_mask = tf.image.resize_bilinear(seg_mask, (height, width), align_corners=True) ''' with tf.name_scope('kp_maps_deconv') as scope: wt = tf.Variable(tf.truncated_normal([9, 9, config.NUM_KP, config.NUM_KP])) kp_maps = tf.nn.conv2d_transpose(kp_maps, wt, [batch_size, height, width, config.NUM_KP], [1, 8, 8, 1], 'SAME') with tf.name_scope('short_offsets_deconv') as scope: wt = tf.Variable(tf.truncated_normal([9, 9, 2*config.NUM_KP, 2*config.NUM_KP])) short_offsets = tf.nn.conv2d_transpose(short_offsets, wt, [batch_size, height, width, 2*config.NUM_KP], [1, 8, 8, 1], 'SAME') with tf.name_scope('mid_offsets_deconv') as scope: wt = tf.Variable(tf.truncated_normal([9, 9, 4*config.NUM_EDGES, 4*config.NUM_EDGES])) mid_offsets = tf.nn.conv2d_transpose(mid_offsets, wt, [batch_size, height, width, 4*config.NUM_EDGES], [1, 8, 8, 1], 'SAME') with tf.name_scope('long_offsets_deconv') as scope: wt = tf.Variable(tf.truncated_normal([9, 9, 2*config.NUM_KP, 2*config.NUM_KP])) long_offsets = tf.nn.conv2d_transpose(long_offsets, wt, [batch_size, height, width, 2*config.NUM_KP], [1, 8, 8, 1], 'SAME') with tf.name_scope('seg_mask_deconv') as scope: wt = tf.Variable(tf.truncated_normal([9, 9, 1, 1])) seg_mask = tf.nn.conv2d_transpose(seg_mask, wt, [batch_size, height, width, 1], [1, 8, 8, 1], 'SAME') ''' mid_offsets = split_and_refine_mid_offsets(mid_offsets, short_offsets) long_offsets = split_and_refine_long_offsets(long_offsets, short_offsets) outputs = [kp_maps, short_offsets, mid_offsets, long_offsets, seg_mask] return outputs
def feature_extractor_resnet(images, dim=256, weight_decay=0.0001, batch_norm_decay=0.999, batch_renorm_decay=0.99, batch_renorm_rmax=3., batch_renorm_dmax=5., is_training=True, use_conv3d=True): from tensorflow.contrib.slim.python.slim.nets import resnet_v2 if use_conv3d: orig_shape = tf.shape(images) # [N,T,H,W,C] -> [N*T,H,W,C] images = tf.reshape(images, tf.concat([[-1], orig_shape[2:]], 0)) resnet_arg_scope = resnet_v2.resnet_arg_scope( weight_decay=weight_decay, batch_norm_decay=batch_norm_decay) # batch size is small so we use batch renormalization batch_norm_key = filter(lambda x: 'batch_norm' in x, resnet_arg_scope.keys())[0] resnet_arg_scope[batch_norm_key].update({ 'renorm': True, 'renorm_decay': batch_renorm_decay, 'renorm_clipping': { 'rmin': 1. / batch_renorm_rmax, 'rmax': batch_renorm_rmax, 'dmax': batch_renorm_dmax } }) with slim.arg_scope(resnet_arg_scope): blocks = [ resnet_v2.resnet_v2_block('block1', base_depth=16, num_units=3, stride=2), resnet_v2.resnet_v2_block('block2', base_depth=32, num_units=4, stride=2), resnet_v2.resnet_v2_block('block3', base_depth=64, num_units=6, stride=2), #256 resnet_v2.resnet_v2_block('block4', base_depth=128, num_units=3, stride=1) #512 ] _, end_points = resnet_v2.resnet_v2(images, blocks, is_training=is_training, include_root_block=False) net = end_points['resnet_v2/block4'] if use_conv3d: # [N*T,H',W',C'] -> [N,T,H',W',C'] net = tf.reshape(net, tf.concat( [orig_shape[:2], tf.shape(net)[1:]], 0)) arg_scope = convert_resnet_arg_scope_to_slim(resnet_arg_scope) arg_scope[slim.conv2d].update({'stride': 1, 'padding': 'SAME'}) arg_scope[slim.conv3d].update({'stride': 1, 'padding': 'SAME'}) arg_scope[slim.batch_norm]['is_training'] = is_training with slim.arg_scope(arg_scope): if use_conv3d: net = slim.conv3d(net, 512, [3, 3, 3]) net = slim.conv3d(net, 256, [1, 1, 1]) net = slim.conv3d(net, 512, [3, 3, 3]) # the last layer without activation function feature_map = slim.conv3d(net, dim, [1, 1, 1], activation_fn=None, normalizer_fn=None) else: # the last layer without activation function feature_map = slim.conv2d(net, dim, [1, 1], activation_fn=None, normalizer_fn=None) return feature_map
def build_model(self): self.img = tf.placeholder(tf.float32, [None, 150, 100, 3]) / 255 self.label = tf.placeholder(tf.float32, [None, 2]) with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, _ = resnet_v2.resnet_v2_50(self.img, num_classes=2, is_training=True, global_pool=True) net = tf.reshape(net, [self.batch_size, 2]) # dense_1 = tf.layers.dense(net, units=5, activation=tf.nn.relu) self.prediction_1 = net # self.prediction = tf.nn.softmax(tf.layers.dense(dense_1, units=2)) with tf.variable_scope("ensemble_2"): with tf.variable_scope('convolution_1') as scope: W_conv1 = tf.Variable(tf.truncated_normal(shape=[5, 5, 3, 32], mean=0, stddev=0.01), name='conv1') b_conv1 = tf.Variable(tf.truncated_normal(shape=[32], mean=0, stddev=0.1), name='bias1') h_conv1 = tf.nn.dropout( tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(self.img, W_conv1, strides=[1, 1, 1, 1], padding='VALID'), b_conv1)), keep_prob=self.dropout_prob) # output size 84x84x16 h_pool1 = tf.nn.max_pool( h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # output size 42x42x16 with tf.variable_scope('convolution_2') as scope: W_conv2 = tf.Variable(tf.truncated_normal(shape=[5, 5, 32, 10], mean=0, stddev=0.01), name='conv2') b_conv2 = tf.Variable(tf.truncated_normal(shape=[10], mean=0, stddev=0.05), name='bias2') h_conv2 = tf.nn.dropout(tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], padding='VALID'), b_conv2)), keep_prob=self.dropout_prob) h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') self.dense_input = tf.reshape(h_pool2, (-1, 34 * 22 * 10)) with tf.variable_scope('dense') as scope: h_dense1 = tf.nn.dropout(tf.layers.dense( self.dense_input, units=34 * 22 * 10, activation=tf.nn.relu), keep_prob=self.dropout_prob) h_dense2 = tf.layers.dense(h_dense1, units=2, activation=None) self.prediction_2 = h_dense2 # self.sum_prediction = tf.concat([self.prediction_1, self.prediction_2], axis=1) self.sum_prediction = (self.prediction_1 + self.prediction_2) / 2 print('sum_prediction:', self.sum_prediction) with tf.variable_scope('result') as scope: # weight = tf.reshape(tf.nn.softmax(tf.layers.dense(self.dense_input, units=2)), (self.batch_size, 1, 2)) # self.pin = weight # self.prediction = tf.nn.softmax(tf.reshape(tf.matmul(weight, self.sum_prediction), (self.batch_size, 2))) self.prediction = tf.nn.softmax(self.sum_prediction) print("prediction:", self.prediction) correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.label, 1)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) reg = tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(1e-4), tf.trainable_variables()) self.cross_entropy = tf.reduce_mean(-tf.reduce_sum( self.label * tf.log(tf.clip_by_value(self.prediction, 1e-10, 0.999999)), reduction_indices=[1])) global_step = tf.Variable(0, trainable=False) # self.learning = tf.train.exponential_decay(self.lr, global_step, 70, 0.8, staircase=True) self.train_step = tf.train.AdamOptimizer(self.lr).minimize( self.cross_entropy, global_step=global_step)
def resnet_v2_spkid(self, inputs, spk_labels, blocks, num_classes, is_training, global_pool, output_stride, reuse, scope): with arg_scope(resnet_v2.resnet_arg_scope()): with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with arg_scope([ layers_lib.conv2d, resnet_v2.bottleneck, slim.conv2d, self.stack_blocks_dense ], outputs_collections=end_points_collection): with arg_scope( [layers_lib.conv2d], weights_regularizer=None, weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.constant_initializer(0.001)): with arg_scope( [layers.batch_norm], is_training=is_training, decay=0.9, epsilon=1e-3, scale=True, param_initializers={ "beta": tf.constant_initializer(value=0), "gamma": tf.random_normal_initializer(mean=1, stddev=0.045), "moving_mean": tf.constant_initializer(value=0), "moving_variance": tf.constant_initializer(value=1) }): net = inputs with arg_scope([layers_lib.conv2d], activation_fn=None, normalizer_fn=None, weights_regularizer=None): net = resnet_utils.conv2d_same(net, 64, 13, 1, scope='conv1') # net = layers.max_pool2d(net, [2, 2], stride=2, scope='pool1') # net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) net = self.stack_blocks_dense( net, blocks, output_stride) net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') end_points = utils.convert_collection_to_dict( end_points_collection) net = layers_lib.conv2d(net, 512, [1, 5], stride=1, activation_fn=None, normalizer_fn=None, scope='res_fc', padding='VALID') end_points[sc.name + '/res_fc'] = net net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='res_fc_bn') if global_pool: ## net : batchsize X W(frame_length) X 1 X Dim ## Global average pooling. # net = tf.reduce_mean(net, [1], name='pool5', keep_dims=True) ## Global statistical pooling # mean,var = tf.nn.moments(net,1,name='pool5', keep_dims=True) # net = tf.concat([mean,var],3) ## Apply attention + stats attention = self.attention_layer(net) end_points['attention'] = attention mean, std = tf.nn.weighted_moments( net, 1, attention, keep_dims=True) net = tf.concat([mean, std], 3) end_points['global_pool'] = net ## Fully Connected layers ## fc1 net = layers_lib.conv2d(net, 1000, [1, 1], stride=1, activation_fn=None, normalizer_fn=None, scope='fc1') end_points[sc.name + '/fc1'] = net net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='fc1_bn') ## fc2 net = layers_lib.conv2d(net, 512, [1, 1], stride=1, activation_fn=None, normalizer_fn=None, scope='fc2') end_points[sc.name + '/fc2'] = net ## output layer ## For AM-softmax net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points[sc.name + '/spatial_squeeze'] = net net, embedding = self.AM_logits_compute( net, spk_labels, num_classes, is_training) end_points[sc.name + '/logits'] = net end_points[sc.name + '/fc3'] = embedding ## for softmax # net = layers.batch_norm(net, activation_fn=tf.nn.relu, scope='fc2_bn') # net = layers_lib.conv2d(net, num_classes, [1, 1], stride=1, activation_fn=None, # normalizer_fn=None, scope='logits') # end_points[sc.name + '/logits'] = net # net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') # end_points[sc.name + '/spatial_squeeze'] = net ## loss end_points['predictions'] = layers.softmax( net, scope='predictions') loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=spk_labels, logits=net)) end_points[sc.name + '/loss'] = loss end_points[sc.name + '/spk_labels'] = spk_labels return loss, end_points
def Encoder_resnet_v2(x, depth=None, is_training=True, weight_decay=0.001, reuse=False, name='Encoder_resnet_v2'): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.variable_scope(name, reuse=reuse) as scope: #with tf.name_scope("Encoder_resnet", [x, depth]): # Defines the default ResNet arg scope with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py; (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. (*) resnet_v2() Returns: 'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out]. - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, - else both height_out and width_out equal one. - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. - If num_classes is not None, net contains the pre-softmax activations. (*) end_points: A dictionary from components of the network to the corresponding activation. """ with tf.variable_scope("resnet_v2_50_img"): net_img, end_points_img = resnet_v2.resnet_v2_50( inputs=x, # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; #reuse=reuse, #scope='resnet_v2_50_img' ) net_img = tf.squeeze(net_img, axis=[1, 2]) # output in N x 2048 # added by CCJ: for depth encoded by resnet_v2_50 with tf.variable_scope("resnet_v2_50_dep"): net_depth = tf.zeros(shape=tf.shape(net_img), dtype=tf.float32) if depth is not None: #NOTE: since we want to load the pre-trained resnet_v2_50 model, # which assumes the input tensor has 3 channels; # So we copy the depth to 3 chanels. Maybe have to find more advanced way to deal with this; net_depth, end_points_depth = resnet_v2.resnet_v2_50( inputs=tf.concat([depth, depth, depth], axis=-1), # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; #reuse= tf.AUTO_REUSE, #scope='resnet_v2_50_dep' ) net_depth = tf.squeeze(net_depth, axis=[1, 2]) # output in N x 2048 # NOTE: to be updated:!!! # added by CCJ: fully connected layer for encoded_depth and encoded_image with tf.variable_scope("encoder_fc"): #tmp = tf.concat([net_img, net_depth], 1) #net = slim.fully_connected(tmp, 2048*2) tmp = tf.concat([ slim.fully_connected(net_img, 2048), slim.fully_connected(net_depth, 2048) ], 1) net = slim.fully_connected(tmp, 2048) variables = tf.contrib.framework.get_variables(scope) #print ("[**] Encoder_resnet_v2(), returns variables: ", variables) return net, variables
def Encoder_resnet(x, depth=None, is_training=True, weight_decay=0.001, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - depth : N x H x W x 1 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Encoder_resnet", [x, depth]): with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py; (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. (*) resnet_v2() Returns: 'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out]. - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, - else both height_out and width_out equal one. - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. - If num_classes is not None, net contains the pre-softmax activations. (*) end_points: A dictionary from components of the network to the corresponding activation. """ net, end_points = resnet_v2.resnet_v2_50( inputs=x, # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; reuse=reuse, scope='resnet_v2_50' #scope='resnet_v2_50_img' ) net = tf.squeeze(net, axis=[1, 2]) # added by CCJ: for depth encoded by resnet_v2_50 net_depth = None if depth is not None: net_depth, end_points_depth = resnet_v2.resnet_v2_50( inputs=depth, # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; #reuse= tf.AUTO_REUSE, scope='resnet_v2_50_dep') net_depth = tf.squeeze(net_depth, axis=[1, 2]) variables = tf.contrib.framework.get_variables('resnet_v2_50_img') if depth is not None: variables = variables + tf.contrib.framework.get_variables( 'resnet_v2_50_dep') #return net, variables return net, net_depth, variables
# -*- coding: utf-8 -*- # @ File ResNetDemo.py # @ Description : # @ Author alexchung # @ Time 21/1/2019 09:52 import tensorflow as tf import tensorflow.contrib.slim as slim from tensorflow.contrib.slim.python.slim.nets import resnet_v2 images = tf.Variable(initial_value=tf.random_uniform(shape=(5, 224, 224, 3), minval=0, maxval=3), dtype=tf.float32) num_classes = tf.constant(value=5, dtype=tf.int32) # is_training = True if __name__ == "__main__": init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: # images, class_num = sess.run([images, class_num]) sess.run(init) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( images, num_classes=num_classes.eval(), is_training=True) for var in tf.model_variables(): print(var.name, var.shape)