def Encoder_resnet(x, is_training=True, weight_decay=0.001, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Encoder_resnet", [x]): with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): net, end_points = resnet_v2.resnet_v2_50( x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v2_50') net = tf.squeeze(net, axis=[1, 2]) variables = tf.contrib.framework.get_variables('resnet_v2_50') return net, variables
def __init__(self, x, num_classes=1001, is_training=False): """Initializes the tensorflow graph for the ResNet50-v2 model. Args: x (tf.Variable): The variable in the tensorflow graph that feeds into the model nodes. num_classes (int): Number of predicted classes for classification tasks. If 0 or None, the features before the logit layer are returned. is_training (bool): Whether batch_norm layers are in training mode. """ super(ResNet50v2, self).__init__() self.x = x self.num_classes = num_classes # populating the tensorflow graph with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2_50( x, num_classes=num_classes, is_training=is_training, reuse=None) self.end_points = _get_updated_endpoints(end_points) self.variables_to_restore = slim.get_variables_to_restore(exclude=[])
def resnet50(x, nums, is_training=True, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Resnet", [x]): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, end_points = resnet_v2.resnet_v2_50( # shape=(N, 1, 1, 2048) x, num_classes=None, is_training=is_training, reuse=reuse, scope='resnet_v2_50') net = tf.squeeze(net, axis=[1, 2]) # shape=(N, 2048) net = slim.fully_connected(net, num_outputs=nums, activation_fn=None, trainable=is_training, reuse=reuse, scope='fc') variables = tf.contrib.framework.get_variables('resnet_v2_50') return net, variables
def test_network(img_path): x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(x, num_classes=2, is_training=False) predictions = end_points["predictions"] saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver.restore(sess, "train.ckpt") w = 224 h = 224 c = 3 imgs = [] img = io.imread(img_path) img = transform.resize(img, (w, h, c)) imgs.append(img) data = np.asarray(imgs, np.float32) predictions_val = predictions.eval(feed_dict={x: data}) print(predictions_val)
def fprop(self, x): num_original_classes = 1001 var_to_ckpt_name = lambda v: \ v.name.replace(self._var_scope+'/', '')\ .replace(':0', '') with slim.arg_scope(resnet_arg_scope()), \ tf.variable_scope(self._var_scope): x = self._preprocessing_fn(x) net, end_points = resnet_v2_50(x, num_classes=num_original_classes, is_training=False, reuse=tf.AUTO_REUSE) end_points = self._get_updated_endpoints(end_points) # Load weights for a particular scope only once if self._var_scope not in self._scopes_loaded: variables_to_restore = list( filter(lambda v: v.name.split('/')[0] == self._var_scope, slim.get_variables_to_restore(exclude=[]))) variable_name_map = { var_to_ckpt_name(v): v for v in variables_to_restore } saver = tf.train.Saver(var_list=variable_name_map) saver.restore(self._sess, self._get_latest_checkpoint_path()) self._scopes_loaded.add(self._var_scope) return end_points
def network_resnet_v2_50(): input_shape = [1, 224, 224, 3] input_ = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape) net, _end_points = resnet_v2_50(input_, num_classes=1000, is_training=False) return net
def tower_loss(scope): images, labels = read_and_decode() if net == 'vgg_16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes) elif net == 'vgg_19': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes) elif net == 'resnet_v1_101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v1_50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) elif net == 'resnet_v2_50': with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes) logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes]) else: raise Exception('No network matched with net %s.' % net) assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes) _ = cal_loss(logits, labels) losses = tf.get_collection('losses', scope) total_loss = tf.add_n(losses, name='total_loss') for l in losses + [total_loss]: loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def _encoder(self, input_images, scope_name = "encoder", trainable = True, scope_reuse = False): with arg_scope(resnet_utils.resnet_arg_scope()): output, end_points = resnet_v2.resnet_v2_50(input_images, output_stride=8, global_pool=False,reuse=scope_reuse)#(256, 256, 2048)==>(32, 32, 2048) hidden_state = decoder_layer(output, out_channels = self.lstm_channel, stride = 1, scope_name = 'encoder_layer1', trainable = trainable)#(32, 32, 2048)==>(32, 32, 512) print hidden_state.get_shape() tf.summary.histogram(hidden_state.op.name + "/activation", hidden_state) return hidden_state
def build_pretrained_graph(self, images, resnet_layer, checkpoint, is_training, reuse=False): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_50(images, is_training=is_training, reuse=reuse) resnet_layer = 'resnet_v2_50/block%d' % resnet_layer resnet_output = endpoints[resnet_layer] resnet_variables = slim.get_variables_to_restore() resnet_variables = [ i for i in resnet_variables if 'global_step' not in i.name ] if is_training and not reuse: init_saver = tf.train.Saver(resnet_variables) def init_fn(scaffold, sess): del scaffold init_saver.restore(sess, checkpoint) else: init_fn = None return resnet_output, resnet_variables, init_fn
def resnet_fm(input_ph): with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, endpoints = resnet_v2.resnet_v2_50(input_ph, num_classes=None, is_training=False, reuse=tf.AUTO_REUSE) feature_map = tf.squeeze(net, axis=[1, 2]) return feature_map
def get_class_resnet(inputs, num_classes, is_training=False): with tf.variable_scope("classifier", custom_getter=float32_variable_storage_getter): with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( inputs, num_classes, reuse=tf.AUTO_REUSE, is_training=is_training) return logits, end_points
def get_logits_prob(self, batch_input): """ Prediction from the model on a single batch. :param batch_input: the input batch. Must be from size [?, 224, 224, 3] :return: the logits and probabilities for the batch """ with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(batch_input, num_classes=1001, is_training=False) probs = tf.squeeze(end_points["predictions"]) probs = probs[1:] return logits, probs
def extract_features_resnet50(self, im, scope_name, reuse=False): use_global_pool = True num_classes = 512 with tf.name_scope(scope_name): with slim.arg_scope(resnet_v2.resnet_arg_scope()): out, _ = resnet_v2.resnet_v2_50(inputs=im, num_classes=num_classes, global_pool=use_global_pool, is_training=self.is_training, scope='resnet_v2_50', reuse=reuse) print('\nShape after Resnet_50\n') print(out.get_shape()) out = layers.flatten(out) return out
def res50_encode(inputs, trainable=False, is_training=False, add_summaries=True): fine_tune = is_training & trainable net, end_points = resnet_v2_50(inputs, is_training=fine_tune, scope="resnet_v2_50") net = tf.squeeze(net, [1, 2], name='resnet_v2_50/squeezed') if add_summaries: for v in end_points.values(): tf.contrib.layers.summaries.summarize_activation(v) return net, end_points
def trainmodel(train_batch, train_label_batch, val_label_batch, num_epochs): with slim.arg_scope(resnet_arg_scope()): train_logits, end_points = resnet_v2.resnet_v2_50(train_batch, num_classes=2, is_training=True) tf.losses.sparse_softmax_cross_entropy(labels=train_label_batch, logits=train_logits) total_loss = tf.losses.get_total_loss() global_step = tf.Variable(0, name='global_step', trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train_op = optimizer.minimize(total_loss, global_step=global_step) prediction_labels = tf.argmax(end_points['predictions'], 3) correct_prediction = tf.equal(prediction_labels, val_label_batch) train_accuracy_batch = tf.reduce_mean(tf.cast(correct_prediction, "float")) saver = tf.train.Saver(tf.trainable_variables() + tf.get_collection_ref("moving_vars")) with tf.Session() as sess: sess.run( tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())) print("Initialized!") step = 0 start_time = time.time() for epoch_index in range(num_epochs): _, loss_out, train_acc_out = sess.run( [train_op, total_loss, train_accuracy_batch]) duration = time.time() - start_time start_time = time.time() print("Minibatch loss at step %d: %.6f (%.3f sec)" % (step, loss_out, duration)) print("Minibatch accuracy: %.6f" % train_acc_out) step += 1 print("Saving checkpoint...") saver.save(sess, './train.ckpt') print("Checkpoint saved!")
def test_network(img_path, label_path): x = tf.placeholder("float", shape=[None, 224, 224, 3], name='input') with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50(x, num_classes=1001, is_training=False) predictions = end_points["predictions"] saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver.restore(sess, "resnet_v2_50.ckpt"); imgfloat = tf.cast(tf.image.decode_jpeg(tf.read_file(img_path), channels=3), dtype=tf.float32) img = tf.subtract(tf.multiply(tf.div(tf.image.resize_images(tf.expand_dims(imgfloat, 0), (224, 224), method=0), 255.0), 2), 1.0) predictions_val = predictions.eval(feed_dict={x: img.eval()}) predicted_classes = np.argmax(predictions_val, axis=3) file = open(label_path, encoding="utf-8") labels = file.readlines() print(predicted_classes, labels[predicted_classes[0][0][0]])
def build_graph(): images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None, CLASS_NUM], name='label_batch') global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) net, end_points = res.resnet_v2_50(inputs=images, num_classes=CLASS_NUM, is_training=True) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=labels) cross_entropy_mean = tf.reduce_mean(cross_entropy) train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy_mean, global_step) reout = tf.reshape(end_points['predictions'], [-1, CLASS_NUM]) with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(reout, 1), tf.argmax(labels, 1)) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('loss', cross_entropy_mean) tf.summary.scalar('accuracy', evaluation_step) merged_summary_op = tf.summary.merge_all() print(evaluation_step.get_shape()) return { 'images': images, 'labels': labels, 'loss': cross_entropy_mean, 'accuracy': evaluation_step, 'train_step': train_step, 'global_step': global_step, 'merged_summary_op': merged_summary_op }
def build_pretrained_graph( self, images, resnet_layer, checkpoint, is_training, reuse=False): """See baseclass.""" with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, endpoints = resnet_v2.resnet_v2_50( images, is_training=is_training, reuse=reuse) resnet_layer = 'resnet_v2_50/block%d' % resnet_layer resnet_output = endpoints[resnet_layer] resnet_variables = slim.get_variables_to_restore() resnet_variables = [ i for i in resnet_variables if 'global_step' not in i.name] if is_training and not reuse: init_saver = tf.train.Saver(resnet_variables) def init_fn(scaffold, sess): del scaffold init_saver.restore(sess, checkpoint) else: init_fn = None return resnet_output, resnet_variables, init_fn
def fprop(self, x): """Exposes all the layers of the model. Args: x (tf.Variable): Tensor which is input to the model. Returns: dict: A dictionary mapping layer names to the corresponding node in the tensorflow graph. """ if x is self.x: return self.end_points else: with slim.arg_scope(resnet_arg_scope()): net, end_points = resnet_v2_50( x, num_classes=self.num_classes, is_training=False, reuse=tf.AUTO_REUSE) return _get_updated_endpoints(end_points)
def get_box_resnet(inputs, is_training=False): with tf.variable_scope("box_net", custom_getter=float32_variable_storage_getter): with slim.arg_scope(resnet_v2.resnet_arg_scope()): out, end_points = resnet_v2.resnet_v2_50(inputs, num_classes=None, global_pool=False, reuse=tf.AUTO_REUSE, is_training=is_training) l2_reg = tf.contrib.layers.l2_regularizer(scale=0.1) attn = tf.layers.conv2d(out, 2048, [1, 1], activation=None, name='attn', kernel_regularizer=l2_reg, reuse=tf.AUTO_REUSE) attn = tf.reduce_mean(attn, [3], name='attn_pool', keepdims=True) # attn = tf.layers.conv2d(out, 64, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn1',reuse=tf.AUTO_REUSE) # attn = tf.layers.conv2d(attn, 32, [1,1], padding='same',activation=tf.nn.leaky_relu,name='attn2',reuse=tf.AUTO_REUSE) # attn = tf.layers.conv2d(attn, 1,[1,1],padding='valid', activation=tf.nn.sigmoid,name='attn3',reuse=tf.AUTO_REUSE) # attn = tf.layers.conv2d(attn, 2048,[1,1],padding='same',activation=None,use_bias=False,kernel_initializer=tf.initializers.ones,name='attn4',trainable=False,reuse=tf.AUTO_REUSE) out = tf.multiply(attn, out) # out = tf.reduce_mean(out,[1,2],name='pool6',keepdims=True) out = tf.layers.conv2d(out, 512, [3, 3], padding='same', activation=None, name='box', reuse=tf.AUTO_REUSE) out = tf.layers.flatten(out, name='box_flatten') box_out = tf.layers.dense(out, 4, activation=None, name='box_out', reuse=tf.AUTO_REUSE) # box_out = tf.squeeze(box_out,[1,2]) return box_out, attn
def build_model(self): self.img = tf.placeholder(tf.float32, [None, 150, 100, 3]) / 255 self.label = tf.placeholder(tf.float32, [None, 2]) with slim.arg_scope(resnet_v2.resnet_arg_scope()): net, _ = resnet_v2.resnet_v2_50(self.img, num_classes=2, is_training=True, global_pool=True) net = tf.reshape(net, [self.batch_size, 2]) # dense_1 = tf.layers.dense(net, units=5, activation=tf.nn.relu) self.prediction_1 = net # self.prediction = tf.nn.softmax(tf.layers.dense(dense_1, units=2)) with tf.variable_scope("ensemble_2"): with tf.variable_scope('convolution_1') as scope: W_conv1 = tf.Variable(tf.truncated_normal(shape=[5, 5, 3, 32], mean=0, stddev=0.01), name='conv1') b_conv1 = tf.Variable(tf.truncated_normal(shape=[32], mean=0, stddev=0.1), name='bias1') h_conv1 = tf.nn.dropout( tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(self.img, W_conv1, strides=[1, 1, 1, 1], padding='VALID'), b_conv1)), keep_prob=self.dropout_prob) # output size 84x84x16 h_pool1 = tf.nn.max_pool( h_conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # output size 42x42x16 with tf.variable_scope('convolution_2') as scope: W_conv2 = tf.Variable(tf.truncated_normal(shape=[5, 5, 32, 10], mean=0, stddev=0.01), name='conv2') b_conv2 = tf.Variable(tf.truncated_normal(shape=[10], mean=0, stddev=0.05), name='bias2') h_conv2 = tf.nn.dropout(tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(h_pool1, W_conv2, strides=[1, 1, 1, 1], padding='VALID'), b_conv2)), keep_prob=self.dropout_prob) h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') self.dense_input = tf.reshape(h_pool2, (-1, 34 * 22 * 10)) with tf.variable_scope('dense') as scope: h_dense1 = tf.nn.dropout(tf.layers.dense( self.dense_input, units=34 * 22 * 10, activation=tf.nn.relu), keep_prob=self.dropout_prob) h_dense2 = tf.layers.dense(h_dense1, units=2, activation=None) self.prediction_2 = h_dense2 # self.sum_prediction = tf.concat([self.prediction_1, self.prediction_2], axis=1) self.sum_prediction = (self.prediction_1 + self.prediction_2) / 2 print('sum_prediction:', self.sum_prediction) with tf.variable_scope('result') as scope: # weight = tf.reshape(tf.nn.softmax(tf.layers.dense(self.dense_input, units=2)), (self.batch_size, 1, 2)) # self.pin = weight # self.prediction = tf.nn.softmax(tf.reshape(tf.matmul(weight, self.sum_prediction), (self.batch_size, 2))) self.prediction = tf.nn.softmax(self.sum_prediction) print("prediction:", self.prediction) correct_prediction = tf.equal(tf.argmax(self.prediction, 1), tf.argmax(self.label, 1)) self.accuracy = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) reg = tf.contrib.layers.apply_regularization( tf.contrib.layers.l2_regularizer(1e-4), tf.trainable_variables()) self.cross_entropy = tf.reduce_mean(-tf.reduce_sum( self.label * tf.log(tf.clip_by_value(self.prediction, 1e-10, 0.999999)), reduction_indices=[1])) global_step = tf.Variable(0, trainable=False) # self.learning = tf.train.exponential_decay(self.lr, global_step, 70, 0.8, staircase=True) self.train_step = tf.train.AdamOptimizer(self.lr).minimize( self.cross_entropy, global_step=global_step)
def main(): # 加 载 预 处 理 好 的 数 据 processed_data = np.load(INPUT_DATA, allow_pickle=True) training_images = processed_data[0] n_training_example = len(training_images) training_labels = processed_data[1] validation_images = processed_data[2] validation_labels = processed_data[3] testing_images = processed_data[4] testing_labels = processed_data[5] print( "%d training examples, %d validation examples and %d testing examples." % (n_training_example, len(validation_labels), len(testing_labels))) # 定 义 inception-v3 的 输 入 , images 为 输 入 图 片 , labels 为 每 一 张 图 片 对 应 的 标 签 images = tf.placeholder(tf.float32, [None, 299, 299, 3], name='Input_images') labels = tf.placeholder(tf.int64, [None], name='labels') # 定 义 inception-v3 模 型 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, _ = resnet_v2.resnet_v2_50(images, num_classes=None) with tf.variable_scope("Logits"): #将原始模型的输出数据去掉维度为2和3的维度,最后只剩维度1的batch数和维度4的300*300*3 #也就是将原来的二三四维度全部压缩到第四维度 net = tf.squeeze(logits, axis=[1, 2]) #加入一层dropout层 net = slim.dropout(net, keep_prob=0.5, scope='dropout_scope') #加入一层全连接层,指定最后输出大小 logits = slim.fully_connected(net, num_outputs=N_CLASSES, scope='fc') # 获 取 需 要 训 练 的 变 量 trainable_variables = get_trainable_variables() # 定 义 交 叉 熵 损 失 tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES), logits, weights=1.0) # 定 义 训 练 过 程 train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize( tf.losses.get_total_loss()) # 计 算 正 确 率 with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(logits, 1), labels) evaluation_step = tf.reduce_mean( tf.cast(correct_prediction, tf.float32)) # 定 义 加 载 模 型 的 函 数 load_fn = slim.assign_from_checkpoint_fn(CKPT_FILE, get_tuned_variables(), ignore_missing_vars=True) #定义保存新的训练好的模型的函数 saver = tf.train.Saver() with tf.Session() as sess: # 初 始 化 没 有 加 载 进 来 的 变 量 init = tf.global_variables_initializer() sess.run(init) #加 载 谷 歌 已 经 训 练 好 的 模 型 print('Loading tuned variables from%s' % CKPT_FILE) load_fn(sess) start = 0 end = BATCH for i in range(STEPS): # 运 行 训 练 过 程 , 这 里 不 会 更 新 全 部 的 参 数 , 只 会 更 新 指 定 的 部 分 参 数 sess.run(train_step, feed_dict={ images: training_images[start:end], labels: training_labels[start:end] }) # 输 出 日 志 if i % 5 == 0 or i + 1 == STEPS: #saver.save(sess, TRAIN_FILE, global_step = i) validation_accuracy = sess.run(evaluation_step, feed_dict={ images: validation_images, labels: validation_labels }) print('Step %d: Validation accuracy = %.lf%%' % (i, validation_accuracy * 100.0)) # 因 为 在 数 据 预 处 理 的 时 候 已 经 做 过 了 打 乱 数 据 的 操 作 , 所 以 这 里 只 需 要 顺 序 使 用 训 练 数 据 start = end if start == n_training_example: start = 0 end = start + BATCH if end > n_training_example: end = n_training_example # 在 后 的 测 试 数 据 上 测 试 正 确 率 test_accuracy = sess.run(evaluation_step, feed_dict={ images: testing_images, labels: testing_labels }) print('Final test accuracy = %.lf%%' % (test_accuracy * 100))
''' batch_size = FLAGS.batch_size video_size = FLAGS.num_frames total_size = batch_size * video_size video_data = tf.placeholder(tf.float32, [batch_size, video_size, 224, 224, 3]) batch_video_data = tf.reshape(video_data, [total_size, 224, 224, 3]) # for i in range(2): pre_logit, epoints = resnet_v2.resnet_v2_50( inputs=batch_video_data, num_classes=None, # reuse = True, scope='resnet_v2_50') orig_vars = slim.get_variables_to_restore() with tf.variable_scope('post_conv'): # pre_logit = tf.reshape(pre_logit, [total_size, 2048]) embeddings = layers.fully_connected(pre_logit, 1024 if FLAGS.big_embeddings else 10, activation_fn=None) activations = tf.nn.relu(embeddings) scores = layers.fully_connected(activations, 1, activation_fn=None) scores = tf.reshape(scores, [batch_size, video_size, 1]) post_conv_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
def forward(self, inputs, reuse=None, is_training=None): net, endpts = resnet_v2.resnet_v2_50(inputs=inputs, reuse=reuse, is_training=is_training) return net, endpts
def Encoder_resnet_v2(x, depth=None, is_training=True, weight_decay=0.001, reuse=False, name='Encoder_resnet_v2'): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.variable_scope(name, reuse=reuse) as scope: #with tf.name_scope("Encoder_resnet", [x, depth]): # Defines the default ResNet arg scope with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py; (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. (*) resnet_v2() Returns: 'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out]. - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, - else both height_out and width_out equal one. - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. - If num_classes is not None, net contains the pre-softmax activations. (*) end_points: A dictionary from components of the network to the corresponding activation. """ with tf.variable_scope("resnet_v2_50_img"): net_img, end_points_img = resnet_v2.resnet_v2_50( inputs=x, # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; #reuse=reuse, #scope='resnet_v2_50_img' ) net_img = tf.squeeze(net_img, axis=[1, 2]) # output in N x 2048 # added by CCJ: for depth encoded by resnet_v2_50 with tf.variable_scope("resnet_v2_50_dep"): net_depth = tf.zeros(shape=tf.shape(net_img), dtype=tf.float32) if depth is not None: #NOTE: since we want to load the pre-trained resnet_v2_50 model, # which assumes the input tensor has 3 channels; # So we copy the depth to 3 chanels. Maybe have to find more advanced way to deal with this; net_depth, end_points_depth = resnet_v2.resnet_v2_50( inputs=tf.concat([depth, depth, depth], axis=-1), # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; #reuse= tf.AUTO_REUSE, #scope='resnet_v2_50_dep' ) net_depth = tf.squeeze(net_depth, axis=[1, 2]) # output in N x 2048 # NOTE: to be updated:!!! # added by CCJ: fully connected layer for encoded_depth and encoded_image with tf.variable_scope("encoder_fc"): #tmp = tf.concat([net_img, net_depth], 1) #net = slim.fully_connected(tmp, 2048*2) tmp = tf.concat([ slim.fully_connected(net_img, 2048), slim.fully_connected(net_depth, 2048) ], 1) net = slim.fully_connected(tmp, 2048) variables = tf.contrib.framework.get_variables(scope) #print ("[**] Encoder_resnet_v2(), returns variables: ", variables) return net, variables
def extract_pred_latent_attr(): ''' Step 1: Create dirs for saving models and logs ''' print('Start extract predicted latent attr') os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id model_path_suffix = os.path.join(FLAGS.network_def + '_' + FLAGS.version + '_' + 'train_multi' + '_imagesize_' + str(FLAGS.img_size) + '_batchsize_' + str(FLAGS.batch_size) + '_experiment_' + FLAGS.experiment_id) model_save_dir = os.path.join('../../data/results_multi/model_weights', model_path_suffix) print('Extract pred attr of train set: ' + model_path_suffix + ' ...') la_save_dir_train = os.path.join( '../../data/results_extract_la' + '/train', model_path_suffix) la_save_dir_test = os.path.join('../../data/results_extract_la' + '/test', model_path_suffix) os.system('mkdir -p {}'.format(la_save_dir_train)) os.system('mkdir -p {}'.format(la_save_dir_test)) ''' Step 2: Create dataset and data generator ''' test_set_train = [] with open(FLAGS.train_file, 'r') as f: for line in f.readlines(): image_name = line.split(' ')[0] test_set_train.append(image_name) print('READING LABELS OF TRAIN DATA') print('Train total num:', len(test_set_train)) test_size_train = len(test_set_train) test_set_test = parse_test_image_list(FLAGS.test_file) print('Test total num:', len(test_set_test)) test_size_test = len(test_set_test) ''' Step 3: Build network graph ''' _, whole_attr_np, _ = parse_repre_label2one_hot_map( FLAGS.attrs_per_class_dir) # print(whole_attr_np) with tf.Graph().as_default() as g3: image_placeholder = tf.placeholder( dtype=tf.float32, shape=[None, FLAGS.img_height, FLAGS.img_width, FLAGS.img_depth]) # [batch, 224, 224, 3] is_training = tf.placeholder(dtype=tf.bool) feature, endpoints = resnet_v2.resnet_v2_50(image_placeholder, num_classes=None, reuse=False, is_training=is_training) feature = tf.squeeze(feature, axis=[1, 2]) print('feature shape:', feature) feature = slim.dropout(feature, keep_prob=1) final_logits = slim.fully_connected(feature, num_outputs=2 * FLAGS.attribute_label_cnt, activation_fn=None) print('logits shape', final_logits) ''' Step 4: Testing ''' total_start_time = time.time() device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True), graph=g3) as sess: # Create model saver saver = tf.train.Saver() # Init all vars init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if True: # Restore pretrained weights pretrained_model = model_save_dir print('load checkpoint of ', pretrained_model) checkpoint = tf.train.get_checkpoint_state(pretrained_model) ckpt = checkpoint.model_checkpoint_path # 获取最新保存的模型检查点文件 saver.restore(sess, ckpt) for variable in tf.trainable_variables(): # check weights with tf.variable_scope('', reuse=True): var = tf.get_variable(variable.name.split(':0')[0]) print(variable.name, np.mean(sess.run(var))) # Extract train la start step = 0 train_la_dict = {} while True: if step < test_size_train: image_name = test_set_train[step:step + FLAGS.batch_size_test] print('IMAGE_NAME', image_name) step = step + FLAGS.batch_size_test image_num = len(image_name) print('image num', image_num) image_data = np.zeros((image_num, FLAGS.img_height, FLAGS.img_width, FLAGS.img_depth), dtype=np.float32) for i in range(image_num): img = open_img(is_train=True, name=image_name[i], size=FLAGS.img_size, color=FLAGS.img_type) if FLAGS.normalize: image_data[i, :, :, :] = img.astype(np.float32) / 255.0 else: image_data[i, :, :, :] = img.astype(np.float32) batch_start_time = time.time() pred_logits = sess.run([final_logits], feed_dict={ image_placeholder: image_data, is_training: False }) pred_logits = np.array(pred_logits).squeeze() for i in range(image_num): train_la_dict[image_name[i]] = pred_logits[i] print('[%s][testing %d][step %d / %d exec %.2f seconds]' % (time.strftime("%Y-%m-%d %H:%M:%S"), image_num, step, test_size_train, (time.time() - batch_start_time))) else: break print('train_la_dict: ', len(train_la_dict)) np.savez(os.path.join(la_save_dir_train, 'train_la.npz'), dict=train_la_dict) train_la_dict_2 = np.load( os.path.join(la_save_dir_train, 'train_la.npz'))['dict'][()] print(len(train_la_dict_2), train_la_dict_2['7c382f330bd76982761f1a9191e9db0e.jpeg']) print('Extract train set done.') print("[%s][total exec %s seconds" % (time.strftime("%Y-%m-%d %H:%M:%S"), (time.time() - total_start_time))) # Extract test la start step = 0 test_la_dict = {} while True: if step < test_size_test: image_name = test_set_test[step:step + FLAGS.batch_size_test] print('IMAGE_NAME', image_name) step = step + FLAGS.batch_size_test image_num = len(image_name) print('image num', image_num) image_data = np.zeros((image_num, FLAGS.img_height, FLAGS.img_width, FLAGS.img_depth), dtype=np.float32) for i in range(image_num): img = open_img(is_train=False, name=image_name[i], size=FLAGS.img_size, color=FLAGS.img_type) if FLAGS.normalize: image_data[i, :, :, :] = img.astype(np.float32) / 255.0 else: image_data[i, :, :, :] = img.astype(np.float32) batch_start_time = time.time() pred_logits = sess.run([final_logits], feed_dict={ image_placeholder: image_data, is_training: False }) pred_logits = np.array(pred_logits).squeeze() for i in range(image_num): test_la_dict[image_name[i]] = pred_logits[i] print('[%s][testing %d][step %d / %d exec %.2f seconds]' % (time.strftime("%Y-%m-%d %H:%M:%S"), image_num, step, test_size_test, (time.time() - batch_start_time))) else: break print('test_la_dict: ', len(test_la_dict)) np.savez(os.path.join(la_save_dir_test, 'test_la.npz'), dict=test_la_dict) test_la_dict_2 = np.load(os.path.join(la_save_dir_test, 'test_la.npz'))['dict'][()] print(len(test_la_dict_2), test_la_dict_2['0003ae092034aa69da9782b2a3b4a15a.jpg']) print('Extract test set done.') print("[%s][total exec %s seconds" % (time.strftime("%Y-%m-%d %H:%M:%S"), (time.time() - total_start_time))) sess.close()
# -*- coding: utf-8 -*- # @ File ResNetDemo.py # @ Description : # @ Author alexchung # @ Time 21/1/2019 09:52 import tensorflow as tf import tensorflow.contrib.slim as slim from tensorflow.contrib.slim.python.slim.nets import resnet_v2 images = tf.Variable(initial_value=tf.random_uniform(shape=(5, 224, 224, 3), minval=0, maxval=3), dtype=tf.float32) num_classes = tf.constant(value=5, dtype=tf.int32) # is_training = True if __name__ == "__main__": init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) with tf.Session() as sess: # images, class_num = sess.run([images, class_num]) sess.run(init) with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits, end_points = resnet_v2.resnet_v2_50( images, num_classes=num_classes.eval(), is_training=True) for var in tf.model_variables(): print(var.name, var.shape)
def get_model(input_pls, is_training, bn=False, bn_decay=None, img_size=224, FLAGS=None): if FLAGS.act == "relu": activation_fn = tf.nn.relu elif FLAGS.act == "elu": activation_fn = tf.nn.elu input_imgs = input_pls['imgs'] input_pnts = input_pls['pnts'] input_gvfs = input_pls['gvfs'] input_onedge = input_pls['onedge'] input_trans_mat = input_pls['trans_mats'] input_obj_rot_mats = input_pls['obj_rot_mats'] batch_size = input_imgs.get_shape()[0].value # endpoints end_points = {} end_points['pnts'] = input_pnts if FLAGS.rot: end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats) end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats) else: end_points['gt_gvfs_xyz'] = input_gvfs #* 10 end_points['pnts_rot'] = input_pnts if FLAGS.edgeweight != 1.0: end_points['onedge'] = input_onedge input_pnts_rot = end_points['pnts_rot'] end_points['imgs'] = input_imgs # B*H*W*3|4 # Image extract features if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size: if FLAGS.alpha: ref_img_rgb = tf.compat.v1.image.resize_bilinear( input_imgs[:, :, :, :3], [img_size, img_size]) ref_img_alpha = tf.image.resize_nearest_neighbor( tf.expand_dims(input_imgs[:, :, :, 3], axis=-1), [img_size, img_size]) ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1) else: ref_img = tf.compat.v1.image.resize_bilinear( input_imgs, [img_size, img_size]) else: ref_img = input_imgs end_points['resized_ref_img'] = ref_img if FLAGS.encoder[:6] == "vgg_16": vgg.vgg_16.default_image_size = img_size with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(FLAGS.wd)): ref_feats_embedding, encdr_end_points = vgg.vgg_16( ref_img, num_classes=FLAGS.num_classes, is_training=False, scope='vgg_16', spatial_squeeze=False) elif FLAGS.encoder == "sim_res": ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder( ref_img, FLAGS.batch_size, is_training=is_training, activation_fn=activation_fn, bn=bn, bn_decay=bn_decay, wd=FLAGS.wd) elif FLAGS.encoder == "resnet_v1_50": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_50') scopelst = [ "resnet_v1_50/block1", "resnet_v1_50/block2", "resnet_v1_50/block3", 'resnet_v1_50/block4' ] elif FLAGS.encoder == "resnet_v1_101": resnet_v1.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v1_101') scopelst = [ "resnet_v1_101/block1", "resnet_v1_101/block2", "resnet_v1_101/block3", 'resnet_v1_101/block4' ] elif FLAGS.encoder == "resnet_v2_50": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_50') scopelst = [ "resnet_v2_50/block1", "resnet_v2_50/block2", "resnet_v2_50/block3", 'resnet_v2_50/block4' ] elif FLAGS.encoder == "resnet_v2_101": resnet_v2.default_image_size = img_size with slim.arg_scope(resnet_v1.resnet_arg_scope()): ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101( ref_img, FLAGS.num_classes, is_training=is_training, scope='resnet_v2_101') scopelst = [ "resnet_v2_101/block1", "resnet_v2_101/block2", "resnet_v2_101/block3", 'resnet_v2_101/block4' ] end_points['img_embedding'] = ref_feats_embedding point_img_feat = None gvfs_feat = None sample_img_points = get_img_points(input_pnts, input_trans_mat) # B * N * 2 if FLAGS.img_feat_onestream: with tf.compat.v1.variable_scope("sdfimgfeat") as scope: if FLAGS.encoder[:3] == "vgg": conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv1/conv1_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv2/conv2_2'], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv3/conv3_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) if FLAGS.encoder[-7:] != "smaller": conv4 = tf.compat.v1.image.resize_bilinear( encdr_end_points['vgg_16/conv4/conv4_3'], (FLAGS.img_h, FLAGS.img_w)) point_conv4 = tf.contrib.resampler.resampler( conv4, sample_img_points) point_img_feat = tf.concat(axis=2, values=[ point_conv1, point_conv2, point_conv3, point_conv4 ]) # small else: print("smaller vgg") point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) # small elif FLAGS.encoder[:3] == "res": # print(encdr_end_points.keys()) conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) else: conv1 = tf.compat.v1.image.resize_bilinear( encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w)) point_conv1 = tf.contrib.resampler.resampler( conv1, sample_img_points) conv2 = tf.compat.v1.image.resize_bilinear( encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w)) point_conv2 = tf.contrib.resampler.resampler( conv2, sample_img_points) conv3 = tf.compat.v1.image.resize_bilinear( encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w)) point_conv3 = tf.contrib.resampler.resampler( conv3, sample_img_points) # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w)) # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points) point_img_feat = tf.concat( axis=2, values=[point_conv1, point_conv2, point_conv3]) print("point_img_feat.shape", point_img_feat.get_shape()) point_img_feat = tf.expand_dims(point_img_feat, axis=2) if FLAGS.decoder == "att": gvfs_feat = gvfnet.get_gvf_att_imgfeat( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) elif FLAGS.decoder == "skip": gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream( input_pnts_rot, ref_feats_embedding, point_img_feat, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) else: if not FLAGS.multi_view: with tf.compat.v1.variable_scope("sdfprediction") as scope: gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot, ref_feats_embedding, is_training, batch_size, bn, bn_decay, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = None, None, None if FLAGS.XYZ: end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_dist'] = tf.sqrt( tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']), axis=2, keepdims=True)) end_points[ 'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum( end_points['pred_gvfs_dist'], 1e-6) else: end_points['pred_gvfs_dist'], end_points[ 'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead( gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn) end_points['pred_gvfs_xyz'] = end_points[ 'pred_gvfs_direction'] * end_points['pred_gvfs_dist'] end_points["sample_img_points"] = sample_img_points # end_points["ref_feats_embedding"] = ref_feats_embedding end_points["point_img_feat"] = point_img_feat return end_points
def build_model(self): """ :return: """ """ Helper Variables """ #self.global_step_tensor = tf.Variable(0, trainable=False, name='global_step') #self.global_step_inc = self.global_step_tensor.assign(self.global_step_tensor + 1) self.global_epoch_tensor = tf.Variable(0, trainable=False, name='global_epoch') self.global_epoch_inc = self.global_epoch_tensor.assign(self.global_epoch_tensor + 1) """ Inputs to the network """ with tf.variable_scope('inputs'): self.x, self.y, self.y_mi, self.bi = self.data_loader.get_input() self.is_training = tf.placeholder(tf.bool, name='Training_flag') tf.add_to_collection('inputs', self.x) tf.add_to_collection('inputs', self.y) tf.add_to_collection('inputs', self.y_mi) tf.add_to_collection('inputs', self.bi) tf.add_to_collection('inputs', self.is_training) """ Network Architecture """ with tf.variable_scope('network'): net, end_points = resnet_v2.resnet_v2_50(inputs = self.x, num_classes = None, global_pool = True) end_points['resnet_v2_50/pool5:0'] = net print("Size after pool: ", net.shape) net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') end_points['resnet_v2_50/spatial_squeeze'] = net print("Size after squeeze: ", net.shape) if (self.config.mode == 'si_branch'): end_points['resnet_v2_50/output_si'] = fully_connected(net, self.num_classes, activation_fn=None, normalizer_fn=None, scope='logits_si') self.logits = end_points['resnet_v2_50/output_si'] net = end_points['resnet_v2_50/output_si'] if (self.config.mode == 'mi_branch'): net = self.mi_pool_layer(net, bag_indices = self.bi, pooling = self.config.pooling) end_points['resnet_v2_50/mi_pool1:0'] = net print("Size after MI: ", net.shape) end_points['resnet_v2_50/output_mi'] = fully_connected(end_points['resnet_v2_50/mi_pool1:0'], self.num_classes, activation_fn=None, normalizer_fn=None, scope='logits_mi') self.logits = end_points['resnet_v2_50/output_mi'] net = end_points['resnet_v2_50/output_mi'] if (self.config.mode == 'si_mi_branch'): end_points['resnet_v2_50/mi_pool1:0'] = self.mi_pool_layer(net, bag_indices = self.bi, pooling = self.config.pooling) end_points['resnet_v2_50/output_mi'] = fully_connected(end_points['resnet_v2_50/mi_pool1:0'], self.num_classes, activation_fn=None, normalizer_fn=None, scope='logits_mi') self.logits = end_points['resnet_v2_50/output_mi'] end_points['resnet_v2_50/output_si'] = fully_connected(net, self.num_classes, activation_fn=None, normalizer_fn=None, scope='logits_si') self.logits_si = end_points['resnet_v2_50/output_si'] net = end_points['resnet_v2_50/output_mi'] end_points['predictions'] = tf.nn.softmax(net) with tf.variable_scope('out'): self.out = end_points['predictions'] tf.add_to_collection('out', self.out) print("predictions out shape: ", self.out.shape) print("network output argmax resnet") with tf.variable_scope('out_argmax'): self.out_argmax = tf.argmax(self.out, axis=-1, output_type=tf.int32, name='out_argmax') print("Arg Max Shape: ", self.out_argmax.shape) with tf.variable_scope('loss-acc'): if (self.config.mode == 'si_mi_branch'): self.update_beta_combined_cost() self.loss = combined_cost_function(self.y, self.logits_si, self.y_mi, self.logits, beta = self.current_beta) else: self.loss = tf.losses.sparse_softmax_cross_entropy(labels = self.y, logits = self.logits) if (self.config.mode != 'si_branch'): self.acc = tf.reduce_mean(tf.cast(tf.equal(self.y_mi, self.out_argmax), tf.float32)) else: self.acc = tf.reduce_mean(tf.cast(tf.equal(self.y, self.out_argmax), tf.float32)) #self.acc = self.evaluate_accuracy(self.y, self.out_argmax, # self.is_training, self.config.patch_count) with tf.variable_scope('train_step'): update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): self.train_step = self.optimizer.minimize(self.loss, global_step=self.global_step_tensor) tf.add_to_collection('test', self.out_argmax) tf.add_to_collection('train', self.train_step) tf.add_to_collection('train', self.loss) tf.add_to_collection('train', self.acc)
def Encoder_resnet(x, depth=None, is_training=True, weight_decay=0.001, reuse=False): """ Resnet v2-50 Assumes input is [batch, height_in, width_in, channels]!! Input: - x: N x H x W x 3 - depth : N x H x W x 1 - weight_decay: float - reuse: bool->True if test Outputs: - cam: N x 3 - Pose vector: N x 72 - Shape vector: N x 10 - variables: tf variables """ from tensorflow.contrib.slim.python.slim.nets import resnet_v2 with tf.name_scope("Encoder_resnet", [x, depth]): with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): """ added by CCJ from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py; (*) Args: reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. (*) resnet_v2() Returns: 'net': A rank-4 tensor of size [batch, height_out, width_out, channels_out]. - If 'global_pool' is False, then height_out and width_out are reduced by a factor of output_stride compared to the respective height_in and width_in, - else both height_out and width_out equal one. - If num_classes is None, then net is the output of the last ResNet block, potentially after global average pooling. - If num_classes is not None, net contains the pre-softmax activations. (*) end_points: A dictionary from components of the network to the corresponding activation. """ net, end_points = resnet_v2.resnet_v2_50( inputs=x, # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; reuse=reuse, scope='resnet_v2_50' #scope='resnet_v2_50_img' ) net = tf.squeeze(net, axis=[1, 2]) # added by CCJ: for depth encoded by resnet_v2_50 net_depth = None if depth is not None: net_depth, end_points_depth = resnet_v2.resnet_v2_50( inputs=depth, # inputs, num_classes=None, is_training=is_training, #added by CCJ: global_pool=True as default; #reuse= tf.AUTO_REUSE, scope='resnet_v2_50_dep') net_depth = tf.squeeze(net_depth, axis=[1, 2]) variables = tf.contrib.framework.get_variables('resnet_v2_50_img') if depth is not None: variables = variables + tf.contrib.framework.get_variables( 'resnet_v2_50_dep') #return net, variables return net, net_depth, variables
def test_one_with_aug_multi(): ''' Step 1: Create dirs for saving models and logs ''' os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_id pretrained_model_path_suffix = os.path.join(FLAGS.network_def + '_' + FLAGS.version + '_' + 'train_multi' + '_imagesize_' + str(FLAGS.img_size) + '_batchsize_' + str(FLAGS.batch_size) + '_experiment_' + FLAGS.experiment_id) pretrained_model_save_dir = os.path.join( '../../data/results_multi/model_weights', pretrained_model_path_suffix) print('Test_one_with_aug_multi: ' + pretrained_model_save_dir + ' ...') test_save_dir = os.path.join('../../submit/results_multi/test_B_with_aug', pretrained_model_path_suffix) os.system('mkdir -p {}'.format(test_save_dir)) ''' Step 2: Create dataset and data generator ''' test_set = parse_test_image_list(FLAGS.test_file) # test setp configuration test_size = len(test_set) image_placeholder = tf.placeholder( dtype=tf.float32, shape=[None, FLAGS.img_height, FLAGS.img_width, FLAGS.img_depth]) is_training = tf.placeholder(dtype=tf.bool) ''' Step 3: Build network graph ''' # logits = model.inference(image_placeholder, FLAGS.num_residual_blocks, reuse=False) feature, endpoints = resnet_v2.resnet_v2_50(image_placeholder, num_classes=None, reuse=False, is_training=is_training) feature = tf.squeeze(feature, axis=[1, 2]) print('feature shape:', feature) feature = slim.dropout(feature, keep_prob=1) final_logits = slim.fully_connected(feature, num_outputs=2 * FLAGS.attribute_label_cnt, activation_fn=None) print('logits shape', final_logits) ''' Step 4: Testing ''' total_start_time = time.time() represent_label2attribute_vec_map = parse_attribute_per_class( FLAGS.attrs_per_class_dir) print('represent_label2attribute_vec_map: ', len(represent_label2attribute_vec_map)) repre_label_list = [] attr_vec_list = [] for repre_label in represent_label2attribute_vec_map.keys(): # print('REPER_LABEL', repre_label) repre_label_list.append(repre_label) attr_vec_list.append(represent_label2attribute_vec_map[repre_label]) print('attribute_vec2represent_label_map: ', len(repre_label_list), len(attr_vec_list)) whole_class_repre_list, whole_attr_np, _ = parse_repre_label2one_hot_map( FLAGS.attrs_per_class_dir) # ####################### use train set to valid train_image2represent_label_map = parse_train_image2represent_label_map( FLAGS.train_file) print('train file', len(train_image2represent_label_map)) gt_attr_save_dir = os.path.join('../../data/results_gt_attr_with_latent', pretrained_model_path_suffix) gt_la_attr = np.load(os.path.join(gt_attr_save_dir, 'gt_la.npz'))['list'] print('gt_la_attr:', gt_la_attr, gt_la_attr.shape) repre_label2true_label_map = parse_represent_label2true_label_map( FLAGS.label_list) word_embedding_per_class = parse_word_embedding_per_class( FLAGS.class_wordembeddings) whole_word_list = [] for i in range(len(whole_class_repre_list)): true_label = repre_label2true_label_map[whole_class_repre_list[i]] word = word_embedding_per_class[true_label] whole_word_list.append(word) whole_word_np = np.array(whole_word_list, dtype=np.float32) print('whole_word_np', whole_word_np.shape, whole_word_np) gt_attr = np.concatenate( (whole_attr_np[:, 0:FLAGS.attribute_label_cnt], gt_la_attr), axis=1) print('gt_attr', gt_attr, gt_attr.shape) total_class_set_list, _, _ = parse_repre_label2one_hot_map( FLAGS.attrs_per_class_dir) print('Total class set', total_class_set_list, len(total_class_set_list)) train_table = [] train_class_table = [] with open(FLAGS.train_file, 'r') as f: for line in f.readlines(): image_name = line.split(' ')[0] class_repre = line.split(' ')[1].replace('\n', '') train_table.append(image_name) if class_repre not in train_class_table: train_class_table.append(class_repre) print('READING LABELS OF TRAIN DATA') print('Total num:', len(train_table)) train_class_set_list = [ item for item in total_class_set_list if item in train_class_table ] print('Train class set', train_class_set_list, len(train_class_set_list)) useen_class_set_list = [ item for item in total_class_set_list if item not in train_class_table ] print('useen_class_set_list', useen_class_set_list, len(useen_class_set_list)) train_class_index_list = [] useen_class_index_list = [] for i in range(len(total_class_set_list)): if total_class_set_list[i] in train_class_set_list: train_class_index_list.append(i) else: useen_class_index_list.append(i) print('train_class_index_list', train_class_index_list, len(train_class_index_list)) print('useen_class_index_list', useen_class_index_list, len(useen_class_index_list)) device_count = {'GPU': 1} if FLAGS.use_gpu else {'GPU': 0} with tf.Session(config=tf.ConfigProto(device_count=device_count, allow_soft_placement=True)) as sess: # Create model saver saver = tf.train.Saver() # Init all vars init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess.run(init_op) if True: # Restore pretrained weights pretrained_model = pretrained_model_save_dir checkpoint = tf.train.get_checkpoint_state(pretrained_model) ckpt = checkpoint.model_checkpoint_path # 获取最新保存的模型检查点文件 saver.restore(sess, ckpt) for variable in tf.trainable_variables(): # check weights with tf.variable_scope('', reuse=True): var = tf.get_variable(variable.name.split(':0')[0]) print(variable.name, np.mean(sess.run(var))) # Test start step = 0 pred_labels_total = [] while True: if step < test_size: image_name = test_set[step] step = step + 1 image_data = aug_test_image(is_train=False, name=image_name, aug_num=FLAGS.aug_num) batch_start_time = time.time() pred_logits = sess.run([final_logits], feed_dict={ image_placeholder: image_data, is_training: False }) pred_logits = np.array(pred_logits).squeeze() scores = np.matmul(pred_logits, gt_attr.T) print('scores_shape', scores.shape) scores_useen = np.zeros( (FLAGS.aug_num, len(useen_class_index_list)), dtype=np.float32) for i in range(len(useen_class_index_list)): scores_useen[:, i] = scores[:, useen_class_index_list[i]] print('scores_useen', scores_useen) max_scroes_indexes = np.argmax(scores_useen, axis=1) print('max_scroes_indexes', max_scroes_indexes) pred_class_index = [] for i in range(max_scroes_indexes.shape[0]): pred_class_index.append( useen_class_index_list[max_scroes_indexes[i]]) print('pred_class_index', pred_class_index) pred_repre_labels = [] for i in range(len(pred_class_index)): pred_repre_labels.append( whole_class_repre_list[pred_class_index[i]]) pred_label_set = list(set(pred_repre_labels)) print('pred_label_set: ', pred_label_set) pred_label_set_num = len(pred_label_set) pred_label_set_count = np.zeros(pred_label_set_num, dtype=np.int32) for pred in pred_repre_labels: for j in range(pred_label_set_num): if pred == pred_label_set[j]: pred_label_set_count[j] += 1 max_index = int(np.argmax(pred_label_set_count)) pred_label_after_vote = pred_label_set[max_index] print('pred_label_after_vote', pred_label_after_vote) pred_labels_total.append(pred_label_after_vote) print('[%s][testing %d][step %d / %d exec %.2f seconds]' % (time.strftime("%Y-%m-%d %H:%M:%S"), 1, step, test_size, (time.time() - batch_start_time))) else: break print('Testing done.') print("[%s][total exec %s seconds" % (time.strftime("%Y-%m-%d %H:%M:%S"), (time.time() - total_start_time))) # write to submit.txt with open( test_save_dir + '/' + 'submit_{}.txt'.format(time.strftime("%Y%m%d_%H%M%S")), 'w') as f: for i in range(len(test_set)): # print('LINES', i) f.writelines([test_set[i] + '\t' + pred_labels_total[i] + '\n']) f.close()