def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) ''' feature_map_layout = { 'from_layer': ['Conv2d_11_pointwise', 'Conv2d_13_pointwise', '', '', '', ''], 'layer_depth': [-1, -1, 512, 256, 256, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } ''' feature_map_layout = { 'from_layer': [ 'FeatureExtractor/vgg_16/conv4/conv4_3', 'FeatureExtractor/vgg_16/fc7', '', '', '', '' ], 'layer_depth': [-1, -1, 256, 128, 128, 128], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } with tf.variable_scope('vgg_16', reuse=self._reuse_weights) as scope: with slim.arg_scope(vgg.vgg_arg_scope()): with (slim.arg_scope(self._conv_hyperparams_fn()) if self._override_base_feature_extractor_hyperparams else context_manager.IdentityContextManager()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): _, image_features = vgg.vgg_16( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), num_classes=None, is_training=self._is_training, scope=scope) print(image_features.keys()) print(image_features.values()) with slim.arg_scope(self._conv_hyperparams_fn()): # TODO(skligys): Enable fused batch norm once quantization supports it. with slim.arg_scope([slim.batch_norm], fused=False): feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ with tf.variable_scope(self._architecture, reuse=self._reuse_weights): with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=self._weight_decay)): with slim.arg_scope([slim.batch_norm], is_training=self._train_batch_norm): blocks = [ resnet_utils.Block('block4', resnet_v1.bottleneck, [{ 'depth': 2048, 'depth_bottleneck': 512, 'stride': 1 }] * 3) ] proposal_classifier_features = resnet_utils.stack_blocks_dense( proposal_feature_maps, blocks) return proposal_classifier_features
def __init__(self): from nets import vgg self.image_size = 224 self.num_classes = 1000 self.predictions_is_correct = False self.use_larger_step_size = False self.use_smoothed_grad = False # For dataprior attacks. gamma = A^2 * D / d in the paper self.gamma = 4.5 batch_shape = [None, self.image_size, self.image_size, 3] self.x_input = tf.placeholder(tf.float32, shape=batch_shape) self.target_label = tf.placeholder(tf.int32, shape=[None]) target_onehot = tf.one_hot(self.target_label, self.num_classes) with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(self.x_input, num_classes=self.num_classes, is_training=False) self.predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1) #logits -= tf.reduce_min(logits) #real = tf.reduce_max(logits * target_onehot, 1) #other = tf.reduce_max(logits * (1 - target_onehot), 1) #self.loss = other - real self.loss = tf.nn.softmax_cross_entropy_with_logits( labels=target_onehot, logits=logits) self.grad = 255.0 * tf.gradients(self.loss, self.x_input)[0] saver = tf.train.Saver(slim.get_model_variables(scope='vgg_16')) self.sess = tf.get_default_session() saver.restore(self.sess, 'vgg_16.ckpt')
def specific_object_recognition(image_size, num_classes_s): # Define placeholders with tf.name_scope('input'): with tf.name_scope('cropped_images'): cropped_images_placeholder = tf.placeholder(dtype="float32", shape=(None, image_size, image_size, 3)) with tf.name_scope('labels'): labels_placeholder = tf.placeholder(dtype="float32", shape=(None, num_classes_s)) keep_prob = tf.placeholder(dtype="float32") is_training = tf.placeholder(dtype="bool") # train flag # Build the graph with slim.arg_scope(vgg_arg_scope()): logits, _ = vgg_16(cropped_images_placeholder, num_classes=num_classes_s, is_training=True, reuse=None) predictions = tf.nn.softmax(logits, name='Predictions') predict_labels = tf.argmax(predictions, 1) return predict_labels, [cropped_images_placeholder, keep_prob, is_training]
def test_vgg_19(img_dir): """ Test VGG-19 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(vgg_arg_scope()): _, _ = vgg_19(inputs, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/vgg_19.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name('vgg_19/fc8/squeezed:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred + 1] print('Result of VGG-19:', name, prob) return name, prob
def model(image): # 图像去均值 image = mean_image_subtraction(image) with slim.arg_scope(vgg.vgg_arg_scope()): conv5_3 = vgg.vgg_16(image) # 卷积操作 rpn_conv = slim.conv2d(conv5_3, 512, 3) lstm_output = Bilstm(rpn_conv, 512, 128, 512, scope_name='BiLSTM') bbox_pred = lstm_fc(lstm_output, 512, 10 * 4, scope_name="bbox_pred") cls_pred = lstm_fc(lstm_output, 512, 10 * 2, scope_name="cls_pred") # transpose: (1, H, W, A x d) -> (1, H, WxA, d) cls_pred_shape = tf.shape(cls_pred) cls_pred_reshape = tf.reshape( cls_pred, [cls_pred_shape[0], cls_pred_shape[1], -1, 2]) cls_pred_reshape_shape = tf.shape(cls_pred_reshape) cls_prob = tf.reshape(tf.nn.softmax( tf.reshape(cls_pred_reshape, [-1, cls_pred_reshape_shape[3]])), [ -1, cls_pred_reshape_shape[1], cls_pred_reshape_shape[2], cls_pred_reshape_shape[3] ], name="cls_prob") return bbox_pred, cls_pred, cls_prob
def _extract_proposal_features(self, preprocessed_inputs, scope): if len(preprocessed_inputs.get_shape().as_list()) != 4: raise ValueError( '`preprocessed_inputs` must be 4 dimensional, got a ' 'tensor of shape %s' % preprocessed_inputs.get_shape()) shape_assert = tf.Assert( tf.logical_and( tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), ['image size must at least be 33 in both height and width.']) with tf.control_dependencies([shape_assert]): with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=self._weight_decay)): with tf.variable_scope(self._architecture, reuse=self._reuse_weights) as var_scope: _, endpoints = self._vgg_model( preprocessed_inputs, final_endpoint='conv5', trainable=self._is_training, freeze_layer=self._freeze_layer, scope=var_scope) handle = self._base_features return endpoints[handle]
def _extract_box_classifier_features(self, proposal_feature_maps, scope): with tf.variable_scope(self._architecture, reuse=self._reuse_weights): with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=self._weight_decay)): proposal_classifier_features = tf.identity( proposal_feature_maps) return proposal_classifier_features
def build(self, cost, model, train): if model == "MobilenetV1": with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): self.logits, self.end_points = mobilenet_v1.mobilenet_v1( self.input, num_classes=self.numb_logits, dropout_keep_prob=self.kp, is_training=train) elif model == "vgg_16": with slim.arg_scope(vgg.vgg_arg_scope()): self.logits, self.end_points = vgg.vgg_16( self.input, num_classes=self.numb_logits, dropout_keep_prob=self.kp, is_training=True) self.prob = tf.nn.softmax(self.logits, name="prob") self.loss = tf.reduce_mean( tf.reduce_sum(tf.pow(self.prob - self.target, 2), axis=1)) tf.summary.scalar('loss', self.loss) if cost == "mse": self.cost = self.loss else: self.xtarget = self.target * (1 - 1e-11) + 1e-12 assert self.xtarget.get_shape().as_list()[1] == self.numb_logits self.xprob = self.prob * (1 - 1e-11) + 1e-12 assert self.xprob.get_shape().as_list()[1] == self.numb_logits self.cost = tf.reduce_mean( tf.reduce_sum(self.xtarget * tf.log(self.xtarget / self.prob), axis=1)) tf.summary.scalar('cost_kl', self.cost)
def build_cnn(self): with tf.contrib.slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19(inputs=self.images) net = end_points['vgg_19/fc7'] # shape = [batch size, 1, 1, 4096] with tf.variable_scope('mlc'): net = tf.contrib.slim.dropout(net, self.dropout_keep_prob, is_training=self.is_training, scope='dropout7') net = tf.contrib.slim.conv2d( net, 1024, [1, 1], activation_fn=tf.nn.relu, normalizer_fn=None, scope='fc8') # shape = [batch size, 1, 1, 1024] net = tf.contrib.slim.dropout(net, self.dropout_keep_prob, is_training=self.is_training, scope='dropout8') net = tf.contrib.slim.conv2d( net, self.label_num, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc9') # shape = [batch size, 1, 1, 15] logits = tf.squeeze(net, [1, 2]) # shape = [batch size, 15] self.logits = logits self.predictions = tf.nn.sigmoid(logits) self.conv5_3_feats = end_points['vgg_19/conv5/conv5_3'] print('cnn built.')
def model(image): image = _p_shape(image, "最开始输入") image = mean_image_subtraction(image) with slim.arg_scope(vgg.vgg_arg_scope()): # 最终,出来的图像是 (m/16 x n/16 x 512) vgg_fc2 = vgg.vgg16(image) vgg_fc2 = _p_shape(vgg_fc2, "VGG的5-3卷基层输出") vgg_fc2 = tf.squeeze(vgg_fc2, [1, 2]) # 把[1,1,4096,4096] => [4096,4096],[1,2],而不是[0,1,2]是因为0是batch logger.debug("vgg_fc2:%r", vgg_fc2.get_shape()) # 先注释掉 init_weights = tf.contrib.layers.variance_scaling_initializer(factor=0.01, mode='FAN_AVG', uniform=False) init_biases = tf.constant_initializer(0.0) w_fc1 = tf.get_variable("w_fc1", [4096,256], initializer=init_weights) w_b1 = tf.get_variable("w_b1", [256], initializer=init_biases) w_fc2 = tf.get_variable("w_fc2", [256, 4], initializer=init_weights) w_b2 = tf.get_variable("w_b2", [4],initializer=init_biases) # 接2个全连接网络 fc1 = tf.add(tf.matmul(vgg_fc2,w_fc1),w_b1) fc1 = tf.nn.relu(fc1) fc1 = tf.nn.dropout(fc1, keep_prob=0.75) fc2 = tf.add(tf.matmul(fc1,w_fc2),w_b2) fc2 = tf.nn.relu(fc2) fc2 = _p_shape(fc2,"fc2 shape:\t") classes = tf.argmax(tf.nn.softmax(fc2),axis=1) classes = _p_shape(classes, "classes shape:\t") return fc2,classes
def _extract_box_classifier_features(self, proposal_feature_maps, scope): """Extracts second stage box classifier features. Args: proposal_feature_maps: A 4-D float tensor with shape [batch_size * self.max_num_proposals, crop_height, crop_width, depth] representing the feature map cropped to each proposal. scope: A scope name (unused). Returns: proposal_classifier_features: A 4-D float tensor with shape [batch_size * self.max_num_proposals, height, width, depth] representing box classifier features for each proposal. """ net = proposal_feature_maps with tf.variable_scope(self._architecture, reuse=self._reuse_weights): with slim.arg_scope( vgg.vgg_arg_scope(weight_decay=self._weight_decay)): net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = slim.dropout(net, 1, is_training=True, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, 1, is_training=True, scope='dropout7') net = slim.conv2d(net, 3, [1, 1], activation_fn=None, normalizer_fn=None, scope='fc8') proposal_classifier_features = net return proposal_classifier_features
def build_train_op(image_tensor, label_tensor, is_training): vgg_argscope = vgg_arg_scope(weight_decay=FLAGS.weight_decay) global_step = tf.get_variable(name="global_step", shape=[], dtype=tf.int32, trainable=False) with slim.arg_scope(vgg_argscope): logits, end_points = vgg_16(image_tensor, is_training=is_training, num_classes=10) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label_tensor)) accuracy = tf.reduce_sum( tf.cast( tf.equal(tf.cast(tf.argmax(logits, 1), tf.int32), label_tensor), tf.int32)) end_points['loss'], end_points['accuracy'] = loss, accuracy if is_training: optimizer = tf.train.AdadeltaOptimizer( learning_rate=FLAGS.learning_rate) train_op = optimizer.minimize(loss, global_step=global_step) return train_op, end_points else: return None, end_points
def __call__(self, x_input): if (self.build): tf.get_variable_scope().reuse_variables() else: self.build = True inception_imags = (x_input / 255.0 - 0.5) * 2 resized_images_vgg = tf.image.resize_images( x_input, [224, 224]) - tf.constant([123.68, 116.78, 103.94]) with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg16, _ = self.network_fn_vgg16( resized_images_vgg, num_classes=self.num_classes, is_training=False) resized_images_res = ( tf.image.resize_images(x_input, [224, 224]) / 255.0 - 0.5) * 2 with slim.arg_scope(resnet_v2.resnet_arg_scope()): logits_res, _ = self.network_fn_res(resized_images_res, num_classes=self.num_classes + 1, is_training=False) logits_res = tf.reshape(logits_res, (-1, 1001)) logits_res = tf.slice(logits_res, [0, 1], [FLAGS.batch_size, self.num_classes]) with slim.arg_scope(inception_utils.inception_arg_scope()): logits_incepv3, _ = self.network_fn_incepv3( inception_imags, num_classes=self.num_classes + 1, is_training=False) logits_incepv3 = tf.slice(logits_incepv3, [0, 1], [FLAGS.batch_size, self.num_classes]) with slim.arg_scope(inception_utils.inception_arg_scope()): logits_incepv4, _ = self.network_fn_incepv4( inception_imags, num_classes=self.num_classes + 1, is_training=False) logits_incepv4 = tf.slice(logits_incepv4, [0, 1], [FLAGS.batch_size, self.num_classes]) with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits_incep_res, _ = self.network_fn_incep_res( inception_imags, num_classes=self.num_classes + 1, is_training=False) logits_incep_res = tf.slice(logits_incep_res, [0, 1], [FLAGS.batch_size, self.num_classes]) alex_images = tf.image.resize_images(x_input, [256, 256]) alex_images = tf.reverse(alex_images, axis=[-1]) alex_mean_npy = np.load('model/alex_mean.npy').swapaxes(0, 1).swapaxes( 1, 2).astype(np.float32) alex_mean_images = tf.constant(alex_mean_npy) alex_images = alex_images[:, ] - alex_mean_images alex_images = tf.slice(alex_images, [0, 14, 14, 0], [FLAGS.batch_size, 227, 227, 3]) _, logits_alex = self.network_fn_alex(alex_images) logits = [ logits_vgg16, logits_res, logits_incepv3, logits_incepv4, logits_incep_res, logits_alex ] ensemble_logits = tf.reduce_mean(tf.stack(logits), 0) return ensemble_logits
def graph(x, y, i, x_max, x_min, grad): eps = FLAGS.max_epsilon num_iter = FLAGS.num_iter alpha = eps / num_iter momentum = FLAGS.momentum num_classes = 1000 with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(x, num_classes=num_classes, is_training=False) pred = tf.argmax(logits, 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def train_vgg16(log_steps,save_summaries_sec,save_interval_secs,num_iterations = num_iterations_vgg): with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) global_step = slim.get_or_create_global_step() #dataset = plantclef2015.get_split('train', plant_data_dir) dataset = plantclef2015_all_labels.get_split('train', plant_data_dir) images,labels = load_batch(dataset, batch_size = batch_size, k=num_patches_vgg, r=r_rotations_vgg) # Add Images to summaries summaries.add(tf.summary.image("input_images", images, batch_size)) # Create the models with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)): logits, _ = vgg.vgg_16(images, num_classes=1000, is_training=False) # Specify the loss function: one_hot_labels = slim.one_hot_encoding(labels, dataset.num_classes) loss(logits, one_hot_labels) #slim.losses.softmax_cross_entropy(logits, one_hot_labels) #tf.losses.softmax_cross_entropy(one_hot_labels, logits) total_loss = slim.losses.get_total_loss() # Create some summaries to visualize the training process: for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) summaries.add(tf.summary.scalar('losses/Total_Loss', total_loss)) # Specify the optimizer and create the train op: learning_rate = tf.train.exponential_decay(start_learning_rate, global_step,updating_iteration_for_learning_rate, updating_gamma, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate= learning_rate, momentum=momentum) train_op = slim.learning.create_train_op(total_loss, optimizer) summaries.add(tf.summary.scalar('training/Learning_Rate', learning_rate)) summary_op = tf.summary.merge(list(summaries), name='summary_op') # Run the training: final_loss = slim.learning.train( train_op, logdir=train_vgg16_dir, log_every_n_steps=log_steps, global_step=global_step, number_of_steps= num_iterations, summary_op=summary_op, init_fn=get_init_fn_vgg(), save_summaries_secs=save_summaries_sec, save_interval_secs=save_interval_secs) print('Finished training. Last batch loss %f' % final_loss)
def vgg_19(inputs): with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19( inputs, num_classes=None, is_training=False, fc_conv_padding='VALID', global_pool=True) return logits, end_points, vgg_19_ckpt_path
def extract_features(input_file_path): image_names = os.listdir(input_file_path) print 'len image list', len(image_names) image_list = list() num_images = len(image_names) for i in range(num_images): img = image_names[i] image_list.append(os.path.join(input_file_path, img)) print image_list[:10] images = image_list slim = tf.contrib.slim # Get the image size that vgg_19 accepts image_size = vgg.vgg_19.default_image_size preprocessed_images = list() out_features_list = [] total_count = 0 batch = 0 while total_count < len(images): batch += 1 print 'batch number', batch preprocessed_images = list() with tf.Graph().as_default(): # This allows for default parameters with slim.arg_scope(vgg.vgg_arg_scope()): for c in range(10): if total_count >= len(images): break ## print total_count print images[total_count] image = tf.read_file(image_list[total_count]) decoded_image = tf.image.decode_jpeg(image, channels=3) preprocessed_images.append(preproc.preprocess_image(decoded_image, image_size, image_size, is_training=True)) total_count += 1 stacked_images = tf.stack(preprocessed_images) print 'stacked images', stacked_images _, end_points = vgg.vgg_19(stacked_images, is_training=False) with tf.Session() as sess: print 'inside tf sess' sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() out_features = sess.run(stacked_images) out_features_list.extend(out_features) print 'accumulated features array' print np.array(out_features_list).shape
def grad_cam(x_input, sess, image): image = (image + 1.0) * 0.5 * 255.0 img_vgg = preprocess_for_model(x_input, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(img_vgg, num_classes=110, is_training=True, scope='vgg_16', reuse=True) end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['pool5'] = end_points_vgg_16['vgg_16/pool5'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) predict = tf.argmax(end_points_vgg_16['probs'], 1) logits = end_points_vgg_16['logits'] before_fc = end_points_vgg_16['pool5'] probs = end_points_vgg_16['probs'] nb_classes = 110 conv_layer = before_fc one_hot = tf.one_hot(predict, 110) signal = tf.multiply(logits, one_hot) loss = tf.reduce_mean(signal) #loss = tf.losses.softmax_cross_entropy(one_hot, # logits, # label_smoothing=0.0, # weights=1.0) grads = tf.gradients(loss, conv_layer)[0] norm_grads = tf.div( grads, tf.sqrt(tf.reduce_mean(tf.square(grads))) + tf.constant(1e-5)) output, grads_val = sess.run([conv_layer, norm_grads], feed_dict={x_input: image}) grads_val = grads_val[0] output = output[0] weights = np.mean(grads_val, axis=(0, 1)) # [512] cam = np.ones(output.shape[0:2], dtype=np.float32) # [7,7] # Taking a weighted average for i, w in enumerate(weights): cam += w * output[:, :, i] # Passing through ReLU #cam = imresize(cam, (224,224)) cam = np.maximum(cam, 0) cam = cam / np.max(cam) cam = imresize(cam, (224, 224)) # Converting grayscale to 3-D cam3 = np.expand_dims(cam, axis=2) cam3 = np.tile(cam3, [1, 1, 3]) img = image[0] img = img / np.max(img) # Superimposing the visualization with the image. new_img = img + cam3 new_img = new_img / np.max(new_img) #new_img = new_img.astype(np.uint8) return cam3
def evaluate_vgg16(batch_size): with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) global_step = slim.get_or_create_global_step() dataset = plantclef2015.get_split('validation', plant_data_dir) images,labels = load_batch(dataset, batch_size = batch_size, k=num_patches_vgg, r=r_rotations_vgg, is_training =False) with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)): logits, _ = vgg.vgg_16(images, num_classes=1000, is_training=False) total_output = [] total_labels = [] total_images = [] with tf.Session() as sess: coord = tf.train.Coordinator() saver = tf. train.Saver() saver.restore(sess, tf.train.latest_checkpoint(train_vgg16_dir)) threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(batch_size): print('step: %d/%d' % (i, batch_size)) o, l , image= sess.run([logits, labels, images[0]]) o = tf.reduce_sum(o, 0)/float(40) total_output.append(o) total_labels.append(l[0]) total_images.append(image) coord.request_stop() coord.join(threads) total_output = tf.stack(total_output,0) total_output = tf.nn.softmax(total_output) labels = tf.constant(total_labels) total_images = sess.run(tf.stack(total_images,0)) top1_op = tf.nn.in_top_k(total_output, labels, 1) top1_acc = sess.run(tf.reduce_mean(tf.cast(top1_op, tf.float32))) print(top1_acc) top5_op = tf.nn.in_top_k(total_output, labels, 5) top5_acc = sess.run(tf.reduce_mean(tf.cast(top5_op, tf.float32))) print(top5_acc) accuracy1_sum = tf.summary.scalar('top1_accuracy', top1_acc) accuracy5_sum = tf.summary.scalar('top5_accuracy', top5_acc) images_sum = tf.summary.image("input_images", total_images, batch_size) accuracy1, accuracy5, image_batch, step = sess.run([accuracy1_sum,accuracy5_sum,images_sum, global_step]) writer = tf.summary.FileWriter(eval_vgg16_dir) writer.add_summary(accuracy1, step) writer.add_summary(accuracy5, step) writer.add_summary(image_batch)
def vgg_16(inputs, is_training, opts): with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=opts.weight_decay)): return vgg.vgg_16( inputs, num_classes=opts.num_classes, dropout_keep_prob=opts.dropout_keep_prob, spatial_squeeze=opts.spatial_squeeze, is_training=is_training, fc_conv_padding='VALID', global_pool=opts.global_pool)
def extract_features(self, preprocessed_inputs): """Extract features from preprocessed inputs. Args: preprocessed_inputs: a [batch, height, width, channels] float tensor representing a batch of images. Returns: feature_maps: a list of tensors where the ith tensor has shape [batch, height_i, width_i, depth_i] """ preprocessed_inputs = shape_utils.check_min_image_dim( 33, preprocessed_inputs) feature_map_layout = { 'from_layer': ['block4', 'block7', 'block8', 'block9', 'block10', 'block11'], 'layer_depth': [-1, -1, -1, -1, -1, -1], 'use_explicit_padding': self._use_explicit_padding, 'use_depthwise': self._use_depthwise, } ''' with slim.arg_scope(self._conv_hyperparams_fn()): with tf.variable_scope('InceptionV2', reuse=self._reuse_weights) as scope: _, image_features = inception_v2.inception_v2_base( ops.pad_to_multiple(preprocessed_inputs, self._pad_to_multiple), final_endpoint='Mixed_5c', min_depth=self._min_depth, depth_multiplier=self._depth_multiplier, scope=scope) ''' with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=0.0)): with tf.variable_scope('vgg16', reuse=self._reuse_weights) as var_scope: _, image_features = vgg.vgg_16_ssd(preprocessed_inputs, num_classes=3, is_training=True, dropout_keep_prob=0.9, spatial_squeeze=False, scope=var_scope, fc_conv_padding='VALID', global_pool=False, end_point='pool5') feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=feature_map_layout, depth_multiplier=self._depth_multiplier, min_depth=self._min_depth, insert_1x1_conv=True, image_features=image_features) return feature_maps.values()
def create_network(self): with tf.contrib.slim.arg_scope(vgg_arg_scope()): logits, end_points = vgg_19(self.img, num_classes=self.nb_class, is_training=self.is_training, fc_conv_padding='SAME', global_pool=True) self.logits = logits self.probabilities = tf.nn.sigmoid(self.logits) self.predictions = tf.cast( self.probabilities >= self.prediction_threshold, tf.float32)
def vgg_net(image, reuse=tf.AUTO_REUSE, keep_prop=0.5): image = tf.reshape(image, [-1, 224, 224, 3]) with tf.variable_scope(name_or_scope='VGG16', reuse=reuse): arg_scope = vgg.vgg_arg_scope() with slim.arg_scope(arg_scope): logits, end_point = vgg.vgg_16(image, 1000, is_training=True, dropout_keep_prob=keep_prop) probs = tf.nn.softmax(logits) # probabilities return logits, probs, end_point
def eval(params): batch_size = params['batch_size'] num_examples = len(params['test_files'][0]) with tf.Graph().as_default(): batch = dut.distorted_inputs(params,is_training=is_training) with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_19(batch[0], num_classes=params['n_output'], is_training=is_training) init_fn=ut.get_init_fn(slim,params) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = params['per_process_gpu_memory_fraction'] with tf.Session(config=config) as sess: # sess.run(tf.initialize_all_variables()) sess.run(tf.initialize_local_variables()) coord = tf.train.Coordinator() threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) init_fn(sess) num_iter = int(math.ceil(num_examples / batch_size)) print('%s: Testing started.' % (datetime.now())) step = 0 loss_lst=[] run_lst=[] run_lst.append(logits) [run_lst.append(lst) for lst in batch[1:len(batch)]] while step < num_iter and not coord.should_stop(): try: batch_res= sess.run(run_lst) except tf.errors.OutOfRangeError: print ('Testing finished....%d'%step) break if(params['write_est']==True): ut.write_est(params,batch_res) est=batch_res[0] gt=batch_res[1] loss= ut.get_loss(params,gt,est) loss_lst.append(loss) s ='VAL --> batch %i/%i | error %f'%(step,num_iter,loss) ut.log_write(s,params) # joint_list=['/'.join(p1.split('/')[0:-1]).replace('joints','img').replace('.cdf','')+'/frame_'+(p1.split('/')[-1].replace('.txt','')).zfill(5)+'.png' for p1 in image_names] # print ('List equality check:') # print len(label_names) == len(set(label_names)) # print sum(joint_list==label_names)==(len(est)) # print(len(label_names)) step += 1 coord.request_stop() coord.join(threads) return np.mean(loss_lst)
def get_network_logits_and_endpoints(network, images): if(network == 'inceptionV1'): with slim.arg_scope(inception.inception_v1_arg_scope(weight_decay=weight_decay)): logits, endpoints = inception.inception_v1(images, num_classes=1000, is_training=False) elif(network == 'vgg16'): with slim.arg_scope(vgg.vgg_arg_scope(weight_decay=weight_decay)): logits, endpoints = vgg.vgg_16(images, num_classes=1000, is_training=False) return logits,endpoints
async def process_image(image_path): image_size = vgg.vgg_16.default_image_size with tf.Graph().as_default(): # Convert filepath string to string tensor #tf_filepath = tf.convert_to_tensor(image_path, dtype=tf.string) #tf_filepath = tf.convert_to_tensor(str(image_path), dtype=tf.string) # Read .JPEG image #tf_img_string = tf.read_file(tf_filepath) image = tf.image.decode_jpeg(tf.image.encode_jpeg(image_path), channels=3) tf_img_string = tf.read_file(str(image_path)) image = tf.image.decode_jpeg( tf_img_string) #tf.image.encode_jpeg(tf_img_string), channels=3) processed_image = vgg_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure the batch norm parameters. with slim.arg_scope(vgg.vgg_arg_scope()): # 1000 classes instead of 1001. logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) probabilities = tf.nn.softmax(logits) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) with tf.Session() as sess: init_fn(sess) np_image, probabilities = sess.run([image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1]) ] names = imagenet.create_readable_names_for_imagenet_labels() animals_found = [] for i in range(5): index = sorted_inds[i] # Shift the index of a class name by one. # print('Probability %0.2f%% => [%s]' % (probabilities[index] * 100, names[index+1])) animals_found.append(names[index + 1]) return animals_found
def classify(url): with tf.Graph().as_default(): image_string = req.urlopen(url).read() image = tf.image.decode_jpeg(image_string, channels=3) processed_image = vgg_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure # the batch norm parameters. arg_scope is a very conveniet # feature of slim library -- you can define default # parameters for layers -- like stride, padding etc. with slim.arg_scope(vgg.vgg_arg_scope()): logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) # In order to get probabilities we apply softmax on the output. probabilities = tf.nn.softmax(logits) # Create a function that reads the network weights # from the checkpoint file that you downloaded. # We will run it in session later. init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) with tf.Session() as sess: writer = tf.summary.FileWriter("/temp/logs", sess.graph) # Load weights init_fn(sess) # We want to get predictions, image as numpy matrix # and resized and cropped piece that is actually # being fed to the network. np_image, network_input, probabilities = sess.run( [image, processed_image, probabilities]) probabilities = probabilities[0, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-probabilities), key=lambda x: x[1]) ] rst = "" for i in range(5): index = sorted_inds[i] pos = probabilities[index] name = names[str(index + 1)] rst += (name + ":" + str(pos) + "\n") return rst
def VGG_16(image_batch_tensor, is_training): ''' Returns the VGG16 model definition for use within the FCN model. Parameters ---------- image_batch_tensor : [batch_size, height, width, channels] Tensor Tensor containing a batch of input images. is_training : bool True if network is being trained, False otherwise. This controls whether dropout layers should be enabled. (Dropout is only enabled during training.) Returns ------- conv7_features: Features with a stride length of 32 (The coarsest layer in the VGG16 network). The layer is referred to as 'fc7' in the original VGG16 network. These features feed into the fc8 logits layer in the original network; however the 'fc8' layer has been removed in this implementation. pool4_features: Features with a stride length of 16. (Output of the 'pool4' layer.) pool3_features: Features with a stride length of 8. (Output of the 'pool3' layer.) ''' # Convert image to float32 before subtracting the mean pixel values image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] with slim.arg_scope(vgg.vgg_arg_scope()): # By setting num_classes to 0 the logits layer is omitted and the input # features to the logits layer are returned instead. This logits layer # will be added as part of the FCN_32s model. (Note: Some FCN # implementations choose to use the 'fc8' logits layer that is already # present in the VGG16 network instead.) # fc_conv_padding = 'SAME' is necessary to ensure that downsampling/ # upsampling work as expected. So, if an image with dimensions that are # multiples of 32 is fed into the network, the resultant FCN pixel # classification will have the same dimensions as the original image. conv7_features, end_points = vgg.vgg_16(mean_centered_image_batch, num_classes=0, is_training=is_training, spatial_squeeze=False, fc_conv_padding='SAME') return conv7_features, end_points['vgg_16/pool4'], end_points[ 'vgg_16/pool3']
def VGG16(image, reuse=tf.AUTO_REUSE): preprocess = lambda x: preprocess_image(x, 224, 224, is_training=False) preprocessed = tf.map_fn(preprocess, elems=image) # preprocessed = preprocess_for_eval(image, 224, 224, 256) arg_scope = vgg.vgg_arg_scope(weight_decay=0.0) with tf.variable_scope(name_or_scope='', reuse=reuse): with slim.arg_scope(arg_scope): logits, end_point = vgg.vgg_16(preprocessed, 1000, is_training=False, dropout_keep_prob=1.0) probs = tf.nn.softmax(logits) # probabilities return logits, probs, end_point
# the network. processed_image = vgg_preprocessing.preprocess_image(image, image_size, image_size, is_training=False) # Networks accept images in batches. # The first dimension usually represents the batch size. # In our case the batch size is one. processed_images = tf.expand_dims(processed_image, 0) # Create the model, use the default arg scope to configure # the batch norm parameters. arg_scope is a very conveniet # feature of slim library -- you can define default # parameters for layers -- like stride, padding etc. with slim.arg_scope(vgg.vgg_arg_scope()): logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) # In order to get probabilities we apply softmax on the output. probabilities = tf.nn.softmax(logits) # Just focus on the top predictions top_pred = tf.nn.top_k(tf.squeeze(probabilities), 5, name="top_predictions") output_nodes = [probabilities, top_pred.indices, top_pred.values] # Create the saver with g.as_default():
def FCN_32s(image_batch_tensor, number_of_classes, is_training): """Returns the FCN-32s model definition. The function returns the model definition of a network that was described in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al. The network subsamples the input by a factor of 32 and uses the bilinear upsampling kernel to upsample prediction by a factor of 32. This means that if the image size is not of the factor 32, the prediction of different size will be delivered. To adapt the network for an any size input use adapt_network_for_any_size_input(FCN_32s, 32). Note: the upsampling kernel is fixed in this model definition, because it didn't give significant improvements according to aforementioned paper. Parameters ---------- image_batch_tensor : [batch_size, height, width, depth] Tensor Tensor specifying input image batch number_of_classes : int An argument specifying the number of classes to be predicted. For example, for PASCAL VOC it is 21. is_training : boolean An argument specifying if the network is being evaluated or trained. It affects the work of underlying dropout layer of VGG-16. Returns ------- upsampled_logits : [batch_size, height, width, number_of_classes] Tensor Tensor with logits representing predictions for each class. Be careful, the output can be of different size compared to input, use adapt_network_for_any_size_input to adapt network for any input size. Otherwise, the input images sizes should be of multiple 32. vgg_16_variables_mapping : dict {string: variable} Dict which maps the FCN-32s model's variables to VGG-16 checkpoint variables names. We need this to initilize the weights of FCN-32s model with VGG-16 from checkpoint file. Look at ipython notebook for examples. """ with tf.variable_scope("fcn_32s") as fcn_32s_scope: upsample_factor = 32 # Convert image to float32 before subtracting the # mean pixel value image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] upsample_filter_np = bilinear_upsample_weights(upsample_factor, number_of_classes) upsample_filter_tensor = tf.constant(upsample_filter_np) # TODO: make pull request to get this custom vgg feature accepted # to avoid using custom slim repo. with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(mean_centered_image_batch, num_classes=number_of_classes, is_training=is_training, spatial_squeeze=False, fc_conv_padding='SAME') downsampled_logits_shape = tf.shape(logits) # Calculate the ouput size of the upsampled tensor upsampled_logits_shape = tf.pack([ downsampled_logits_shape[0], downsampled_logits_shape[1] * upsample_factor, downsampled_logits_shape[2] * upsample_factor, downsampled_logits_shape[3] ]) # Perform the upsampling upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1]) # Map the original vgg-16 variable names # to the variables in our model. This is done # to make it possible to use assign_from_checkpoint_fn() # while providing this mapping. # TODO: make it cleaner vgg_16_variables_mapping = {} vgg_16_variables = slim.get_variables(fcn_32s_scope) for variable in vgg_16_variables: # Here we remove the part of a name of the variable # that is responsible for the current variable scope # original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.original_name_scope):-2] # Updated: changed .name_scope to .name because name_scope only affects operations # and variable scope is actually represented by .name original_vgg_16_checkpoint_string = variable.name[len(fcn_32s_scope.name)+1:-2] vgg_16_variables_mapping[original_vgg_16_checkpoint_string] = variable return upsampled_logits, vgg_16_variables_mapping
def FCN_8s(image_batch_tensor, number_of_classes, is_training): """Returns the FCN-8s model definition. The function returns the model definition of a network that was described in 'Fully Convolutional Networks for Semantic Segmentation' by Long et al. The network subsamples the input by a factor of 32 and uses three bilinear upsampling layers to upsample prediction by a factor of 32. This means that if the image size is not of the factor 32, the prediction of different size will be delivered. To adapt the network for an any size input use adapt_network_for_any_size_input(FCN_8s, 32). Note: the upsampling kernel is fixed in this model definition, because it didn't give significant improvements according to aforementioned paper. Parameters ---------- image_batch_tensor : [batch_size, height, width, depth] Tensor Tensor specifying input image batch number_of_classes : int An argument specifying the number of classes to be predicted. For example, for PASCAL VOC it is 21. is_training : boolean An argument specifying if the network is being evaluated or trained. It affects the work of underlying dropout layer of VGG-16. Returns ------- upsampled_logits : [batch_size, height, width, number_of_classes] Tensor Tensor with logits representing predictions for each class. Be careful, the output can be of different size compared to input, use adapt_network_for_any_size_input to adapt network for any input size. Otherwise, the input images sizes should be of multiple 32. fcn_16s_variables_mapping : dict {string: variable} Dict which maps the FCN-8s model's variables to FCN-16s checkpoint variables names. We need this to initilize the weights of FCN-8s model with FCN-16s from checkpoint file. Look at ipython notebook for examples. """ # Convert image to float32 before subtracting the # mean pixel value image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] upsample_filter_factor_2_np = bilinear_upsample_weights(factor=2, number_of_classes=number_of_classes) upsample_filter_factor_8_np = bilinear_upsample_weights(factor=8, number_of_classes=number_of_classes) upsample_filter_factor_2_tensor = tf.constant(upsample_filter_factor_2_np) upsample_filter_factor_8_tensor = tf.constant(upsample_filter_factor_8_np) with tf.variable_scope("fcn_8s", reuse = None) as fcn_8s_scope: # Define the model that we want to use -- specify to use only two classes at the last layer # TODO: make pull request to get this custom vgg feature accepted # to avoid using custom slim repo. with slim.arg_scope(vgg.vgg_arg_scope()): ## Original FCN-32s model definition last_layer_logits, end_points = vgg.vgg_16(mean_centered_image_batch, num_classes=number_of_classes, is_training=is_training, spatial_squeeze=False, fc_conv_padding='SAME') last_layer_logits_shape = tf.shape(last_layer_logits) # Calculate the ouput size of the upsampled tensor last_layer_upsampled_by_factor_2_logits_shape = tf.stack([ last_layer_logits_shape[0], last_layer_logits_shape[1] * 2, last_layer_logits_shape[2] * 2, last_layer_logits_shape[3] ]) # Perform the upsampling last_layer_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(last_layer_logits, upsample_filter_factor_2_tensor, output_shape=last_layer_upsampled_by_factor_2_logits_shape, strides=[1, 2, 2, 1]) ## Adding the skip here for FCN-16s model # We created vgg in the fcn_8s name scope -- so # all the vgg endpoints now are prepended with fcn_8s name pool4_features = end_points['fcn_8s/vgg_16/pool4'] # We zero initialize the weights to start training with the same # accuracy that we ended training FCN-32s pool4_logits = slim.conv2d(pool4_features, number_of_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=tf.zeros_initializer, scope='pool4_fc') fused_last_layer_and_pool4_logits = pool4_logits + last_layer_upsampled_by_factor_2_logits fused_last_layer_and_pool4_logits_shape = tf.shape(fused_last_layer_and_pool4_logits) # Calculate the ouput size of the upsampled tensor fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape = tf.stack([ fused_last_layer_and_pool4_logits_shape[0], fused_last_layer_and_pool4_logits_shape[1] * 2, fused_last_layer_and_pool4_logits_shape[2] * 2, fused_last_layer_and_pool4_logits_shape[3] ]) # Perform the upsampling fused_last_layer_and_pool4_upsampled_by_factor_2_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits, upsample_filter_factor_2_tensor, output_shape=fused_last_layer_and_pool4_upsampled_by_factor_2_logits_shape, strides=[1, 2, 2, 1]) ## Adding the skip here for FCN-8s model pool3_features = end_points['fcn_8s/vgg_16/pool3'] # We zero initialize the weights to start training with the same # accuracy that we ended training FCN-32s pool3_logits = slim.conv2d(pool3_features, number_of_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=tf.zeros_initializer, scope='pool3_fc') fused_last_layer_and_pool4_logits_and_pool_3_logits = pool3_logits + \ fused_last_layer_and_pool4_upsampled_by_factor_2_logits fused_last_layer_and_pool4_logits_and_pool_3_logits_shape = tf.shape(fused_last_layer_and_pool4_logits_and_pool_3_logits) # Calculate the ouput size of the upsampled tensor fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape = tf.stack([ fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[0], fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[1] * 8, fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[2] * 8, fused_last_layer_and_pool4_logits_and_pool_3_logits_shape[3] ]) # Perform the upsampling fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits = tf.nn.conv2d_transpose(fused_last_layer_and_pool4_logits_and_pool_3_logits, upsample_filter_factor_8_tensor, output_shape=fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits_shape, strides=[1, 8, 8, 1]) fcn_16s_variables_mapping = {} fcn_8s_variables = slim.get_variables(fcn_8s_scope) for variable in fcn_8s_variables: # We only need FCN-16s variables to resture from checkpoint # Variables of FCN-8s should be initialized if 'pool3_fc' in variable.name: continue # Here we remove the part of a name of the variable # that is responsible for the current variable scope original_fcn_16s_checkpoint_string = 'fcn_16s/' + variable.name[len(fcn_8s_scope.original_name_scope):-2] fcn_16s_variables_mapping[original_fcn_16s_checkpoint_string] = variable return fused_last_layer_and_pool4_logits_and_pool_3_upsampled_by_factor_8_logits, fcn_16s_variables_mapping