def _build_model(self): """ Builds a ResNet-50 network using slim. """ visual_images = tf.placeholder( tf.float32, [None, self.height, self.width, self.channels], name='visual_images') is_training = tf.placeholder(tf.bool, name='is_training') keep_prob = tf.placeholder(tf.float32, name='keep_prob') with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=5e-4)): output, network = resnet_v1.resnet_v1_50( visual_images, num_classes=self.num_classes, is_training=is_training) output = tf.squeeze(output, [1, 2]) network.update({ 'input': visual_images, 'is_training': is_training, 'keep_prob': keep_prob }) return output, network
def det_lesion_resnet(inputs, is_training_option=False, scope='det_lesion'): """Defines the network Args: inputs: Tensorflow placeholder that contains the input image scope: Scope name for the network Returns: net: Output Tensor of the network end_points: Dictionary with all Tensors of the network """ with tf.variable_scope(scope, 'det_lesion', [inputs]) as sc: end_points_collection = sc.name + '_end_points' with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training_option) net = slim.flatten(net, scope='flatten5') net = slim.fully_connected( net, 1, activation_fn=tf.nn.sigmoid, weights_initializer=initializers.xavier_initializer(), scope='output') utils.collect_named_outputs(end_points_collection, 'det_lesion/output', net) end_points = slim.utils.convert_collection_to_dict(end_points_collection) return net, end_points
def feature_extractor(patch): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50( patch, 1000, is_training=self.train_mode, reuse=tf.AUTO_REUSE) resnet_feature = end_points['resnet_v1_50/block4'] resnet_feature = tf.reduce_mean(resnet_feature, [1, 2], keepdims=True) resnet_feature = tf.squeeze(resnet_feature) resnet_feature = tf.reshape(resnet_feature, [-1, 2048]) drop = tf.layers.dropout(resnet_feature, rate=self.drop1, training=self.train_mode) glimpse_feature = tf.layers.dense( inputs=drop, units=512, activation=tf.nn.relu, kernel_initializer=tf.glorot_uniform_initializer(), bias_initializer=tf.constant_initializer(0.1), # kernel_regularizer=tf.nn.l2_loss, name='glimpse_feature/fc', reuse=tf.AUTO_REUSE) return glimpse_feature
def build_net(self, x, is_training): """ Defines network architecture (ResNet-50 feature extractor + classifier) """ # network architecture with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(x, num_classes=2, is_training=is_training) with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu): net = end_points[ 'resnet_v1_50/block4'] # last bottleneck before logits with tf.variable_scope('resnet_v1_50'): z = slim.conv2d(net, self.config.dim_z, [7, 7], padding='VALID', activation_fn=tf.nn.relu, scope='bottleneck_layer') logits = slim.conv2d(z, 2, [1, 1], activation_fn=None, scope='logit_layer') return logits, z
def build_graph(self, orig_image): mean = tf.get_variable('resnet_v1_50/mean_rgb', shape=[3]) with guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope()): image = tf.expand_dims(orig_image - mean, 0) logits, _ = resnet_v1.resnet_v1_50(image, 1000) saliency_map(logits, orig_image, name="saliency")
def Eval(x_img_224, x_img_299, y): input_image = x_img_224 - tf.reshape(tf.constant([123.68, 116.78, 103.94]), [1, 1, 1, 3]) with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( input_image, num_classes=110, is_training=False, scope='resnet_v1_50', reuse=tf.AUTO_REUSE) end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) res_label = tf.argmax(end_points_res_v1_50['probs'][0], -1) y_r = end_points_res_v1_50['probs'][0][y[0]] with slim.arg_scope(vgg.vgg_arg_scope()) as scope: logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(input_image, num_classes=110, is_training=False, scope='vgg_16', reuse=tf.AUTO_REUSE) end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) vgg_label = tf.argmax(end_points_vgg_16['probs'][0], -1) y_v = end_points_vgg_16['probs'][0][y[0]] return res_label, vgg_label, y_r, y_v
def single_stream(self, images, modality, is_training, reuse=False): with tf.variable_scope(modality, reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(images, self.no_classes, is_training=is_training, reuse=reuse) # last bottleneck before logits net = end_points[modality + '/resnet_v1_50/block4'] if 'autoencoder' in self.mode: return net with tf.variable_scope(modality + '/resnet_v1_50', reuse=reuse): bottleneck = slim.conv2d(net, self.hidden_repr_size, [7, 7], padding='VALID', activation_fn=tf.nn.relu, scope='f_repr') net = slim.conv2d(bottleneck, self.no_classes, [1, 1], activation_fn=None, scope='_logits_') if ('train_hallucination' in self.mode or 'test_disc' in self.mode or 'train_eccv' in self.mode): return net, bottleneck return net
def eval(x, num_classes=110): with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( x, num_classes=num_classes, is_training=False, scope='InceptionV1') pred1 = tf.argmax(end_points_inc_v1['Predictions'], 1) # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image = (((x + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) pred2 = tf.argmax(end_points_res_v1_50['probs'], 1) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) pred3 = tf.argmax(end_points_vgg_16['probs'], 1) return [pred1, pred2, pred3]
def localizationNet(input, is_train=False, reuse=False, scope='resnet_v1_50'): lrelu = lambda x: tf.nn.leaky_relu(x, 0.2) with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = resnet_v1.resnet_v1_50(scale_RGB(input), global_pool=True, is_training=is_train, reuse=reuse) net = tl.layers.InputLayer(net) net = tl.layers.FlattenLayer(net, name='flatten') net = tl.layers.DenseLayer(net, n_units=2048, act=tf.identity, name='df/dense1') net = tl.layers.DenseLayer(net, n_units=1024, act=tf.identity, name='df/dense2') net = tl.layers.DenseLayer(net, n_units=512, act=tf.identity, name='df/dense3') net = tl.layers.DenseLayer(net, n_units=50, act=tf.identity, name='df/dense4') thetas_affine = net.outputs return thetas_affine
def feature_extractor(mode, features, labels, config, params): """Fully Convolutional feature extractor for Semantic Segmentation. This function returns a feature extractor. First, the base feature extractor is created, which consists of a predefined network that is parameterized for the problem of SS. Then, an optional extension to the feature extractor is created (in series with the base) to deal the with feature dimensions and the receptive field of the feature representation specialized to SS. """ # delete unused arguments from local namescope del labels, config with tf.variable_scope('feature_extractor'): # resnet base feature extractor scope arguments resnet_scope_args = {} if mode == tf.estimator.ModeKeys.TRAIN: resnet_scope_args.update(weight_decay=params.regularization_weight, batch_norm_decay=params.batch_norm_decay) # build base of feature extractor with tf.variable_scope('base'), (slim.arg_scope( resnet_arg_scope(**resnet_scope_args))): # when num_classes=None no logits layer is created, # when global_pool=False model is used for dense output fe, end_points = resnet_v1.resnet_v1_50( features, num_classes=None, is_training=params.batch_norm_istraining, global_pool=False, output_stride=params.stride_feature_extractor) # build extension to feature extractor # decrease feature dimensions and increase field of view of # feature extractor in a memory and computational efficient way # hf/sfe x wf/sfe x 2048 8/32 (??) --> # hf/sfe x wf/sfe x projection_dims 8/32 --> # hf/sfe x wf/sfe x projection_dims 8/XX # TODO: add to end_points the outputs of next layers with tf.variable_scope('extension'): # WARNING: this scope assumes that slim.conv2d uses slim.batch_norm # for the batch normalization, which holds at least up to TF v1.4 with slim.arg_scope( [slim.batch_norm], is_training=params.batch_norm_istraining), (slim.arg_scope( resnet_arg_scope(**resnet_scope_args))): if params.feature_dims_decreased > 0: fe = slim.conv2d(fe, num_outputs=params.feature_dims_decreased, kernel_size=1, scope='decrease_fdims') if params.fov_expansion_kernel_rate > 0 and params.fov_expansion_kernel_size > 0: fe = slim.conv2d( fe, num_outputs=fe.shape[-1], kernel_size=params.fov_expansion_kernel_size, rate=params.fov_expansion_kernel_rate, scope='increase_fov') return fe, end_points, resnet_scope_args
def non_target_graph(x, y, i, x_max, x_min, grad): eps = 2.0 * max_epsilon / 255.0 alpha = eps / num_iter num_classes = 110 with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( x, num_classes=num_classes, is_training=False, scope='InceptionV1') # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image = (((x + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) ######################## # Using model predictions as ground truth to avoid label leaking pred = tf.argmax( end_points_inc_v1['Predictions'] + end_points_res_v1_50['probs'] + end_points_vgg_16['probs'], 1) first_round = tf.cast(tf.equal(i, 0), tf.int64) y = first_round * pred + (1 - first_round) * y one_hot = tf.one_hot(y, num_classes) ######################## logits = (end_points_inc_v1['Logits'] + end_points_res_v1_50['logits'] + end_points_vgg_16['logits']) / 3.0 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reduce_mean(tf.abs(noise), [1, 2, 3], keep_dims=True) noise = momentum * grad + noise x = x + alpha * tf.sign(noise) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def _build_graph(self, inputs): orig_image = inputs[0] mean = tf.get_variable('resnet_v1_50/mean_rgb', shape=[3]) with guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=False)): image = tf.expand_dims(orig_image - mean, 0) logits, _ = resnet_v1.resnet_v1_50(image, 1000) tp.symbolic_functions.saliency_map(logits, orig_image, name="saliency")
def forward_network(self, input_, scope="resnet50", reuse=False): with tf.variable_scope(scope, reuse=reuse) as vs: _, end_points = resnet_v1.resnet_v1_50(input_, 1000, is_training=self.is_training) net = end_points[scope + '/resnet_v1_50/block4'] output_ = tf.reshape(net0, [-1, net.get_shape().as_list()[1]*net.get_shape().as_list()[2]*net.get_shape().as_list()[3]], name='reshape') variables = tf.contrib.framework.get_variables(vs) return output_, variables
def resnet (X, scope=None, reuse=True): print("USING RESNET") net, _ = resnet_v1.resnet_v1_50(X, num_classes=2, global_pool = True, scope=scope, reuse=reuse) return net
def _build_graph(self, inputs): orig_image = inputs[0] mean = tf.get_variable('resnet_v1_50/mean_rgb', shape=[3]) with tp.symbolic_functions.guided_relu(): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=False)): image = tf.expand_dims(orig_image - mean, 0) logits, _ = resnet_v1.resnet_v1_50(image, 1000) tp.symbolic_functions.saliency_map(logits, orig_image, name="saliency")
def net_graph(inputs_X): def encoder(tensor_name, layer_name): with tf.variable_scope(layer_name): encoder_tensor = tf.get_default_graph().get_tensor_by_name( tensor_name) encoder_tensor = layers_lib.conv2d( encoder_tensor, 256, [1, 1], stride=1, padding='VALID', scope="conv1", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, weights_regularizer=layers_lib.l2_regularizer(1e-4)) encoder_tensor = layers_lib.conv2d( encoder_tensor, 256, [3, 3], stride=1, padding='VALID', scope="conv3", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, weights_regularizer=layers_lib.l2_regularizer(1e-4)) out_tensor = math_ops.reduce_mean(encoder_tensor, [1, 2], name='gap', keepdims=False) #old style #out_tensor = tf.reduce_mean(encoder_tensor,axis=[1,2]) return out_tensor with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs_X, is_training=True) #orginal net with tf.variable_scope("encoder"): encoder1 = encoder("resnet_v1_50/block1/unit_3/bottleneck_v1/Relu:0", "encoder1") encoder2 = encoder("resnet_v1_50/block2/unit_4/bottleneck_v1/Relu:0", "encoder2") encoder3 = encoder("resnet_v1_50/block3/unit_6/bottleneck_v1/Relu:0", "encoder3") encoder4 = encoder("resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0", "encoder4") concat = tf.concat([encoder1, encoder2, encoder3, encoder4], -1, name='concat') predictions = layers_lib.fully_connected( concat, 1, name="fintune_FC", weights_regularizer=layers_lib.l2_regularizer(1e-4)) tf.add_to_collection("predictions", predictions) current_epoch = tf.Variable(0, name="current_epoch") return predictions, current_epoch
def target_graph(x, y, i, x_max, x_min, grad): eps = 2.0 * max_epsilon / 255.0 alpha = eps / num_iter num_classes = 110 #input image size[224,224,3] images3 = tf.image.resize_bilinear(input_diversity(x), [224, 224], align_corners=False) with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( images3, num_classes=num_classes, is_training=False, scope='InceptionV1') # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image1 = (((input_diversity(x) + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image1, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze(end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax(end_points_res_v1_50['logits']) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) image2 = (((input_diversity(x) + 1.0) * 0.5) * 255.0) processed_imgs_vgg_16 = preprocess_for_model(image2, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16( processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) one_hot = tf.one_hot(y, num_classes) logits = (end_points_inc_v1['Logits'] + end_points_res_v1_50['logits'] + end_points_vgg_16['logits']) / 3.0 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = tf.nn.depthwise_conv2d(noise, stack_kernel, strides=[1, 1, 1, 1], padding='SAME') noise = noise / tf.reshape(tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) noise = momentum * grad + noise noise = noise / tf.reshape(tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) noise1 = tf.image.resize_images(noise, [140, 140], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) print("noise shape:", noise.shape) noise1 = alpha * tf.clip_by_value(tf.round(noise1), -2, 2) noise_paded = tf.pad(noise1,[[0, 0], [42, 42], [42, 42], [0, 0]], constant_values=0.) x = x - noise_paded x = tf.clip_by_value(x, x_min, x_max) print("x.shape:", x.shape) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def target_graph(x, y, i, x_max, x_min, grad): eps = 2.0 * max_epsilon / 255.0 alpha = eps / num_iter num_classes = 110 with slim.arg_scope(inception.inception_v1_arg_scope()): logits_inc_v1, end_points_inc_v1 = inception.inception_v1( x, num_classes=num_classes, is_training=False, scope='InceptionV1') # rescale pixle range from [-1, 1] to [0, 255] for resnet_v1 and vgg's input image = (((x + 1.0) * 0.5) * 255.0) processed_imgs_res_v1_50 = preprocess_for_model(image, 'resnet_v1_50') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits_res_v1_50, end_points_res_v1_50 = resnet_v1.resnet_v1_50( processed_imgs_res_v1_50, num_classes=num_classes, is_training=False, scope='resnet_v1_50') end_points_res_v1_50['logits'] = tf.squeeze( end_points_res_v1_50['resnet_v1_50/logits'], [1, 2]) end_points_res_v1_50['probs'] = tf.nn.softmax( end_points_res_v1_50['logits']) # image = (((x + 1.0) * 0.5) * 255.0)#.astype(np.uint8) processed_imgs_vgg_16 = preprocess_for_model(image, 'vgg_16') with slim.arg_scope(vgg.vgg_arg_scope()): logits_vgg_16, end_points_vgg_16 = vgg.vgg_16(processed_imgs_vgg_16, num_classes=num_classes, is_training=False, scope='vgg_16') end_points_vgg_16['logits'] = end_points_vgg_16['vgg_16/fc8'] end_points_vgg_16['probs'] = tf.nn.softmax(end_points_vgg_16['logits']) ######################## one_hot = tf.one_hot(y, num_classes) ######################## logits = (end_points_inc_v1['Logits'] + end_points_res_v1_50['logits'] + end_points_vgg_16['logits']) / 3.0 cross_entropy = tf.losses.softmax_cross_entropy(one_hot, logits, label_smoothing=0.0, weights=1.0) noise = tf.gradients(cross_entropy, x)[0] noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) noise = momentum * grad + noise noise = noise / tf.reshape( tf.contrib.keras.backend.std(tf.reshape(noise, [batch_size, -1]), axis=1), [batch_size, 1, 1, 1]) x = x - alpha * tf.clip_by_value(tf.round(noise), -2, 2) x = tf.clip_by_value(x, x_min, x_max) i = tf.add(i, 1) return x, y, i, x_max, x_min, noise
def create_model(self, frames): with tf.variable_scope("video_model", reuse=tf.AUTO_REUSE): with slim.arg_scope(slim.nets.resnet_utils.resnet_arg_scope()): video_input = tf.cast(frames, tf.float32) features, _ = resnet_v1.resnet_v1_50(video_input, None, self.is_training) return features
def net_graph_debug(inputs_X): def encoder(tensor_name, layer_name): with tf.variable_scope(layer_name): encoder_tensor = tf.get_default_graph().get_tensor_by_name( tensor_name) #tf.summary.histogram(layer_name+'resnet_out',encoder_tensor) encoder_tensor = layers_lib.conv2d(encoder_tensor, 256, [1, 1], stride=2, padding='SAME', scope="conv1", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, trainable=True) encoder_tensor = layers_lib.conv2d(encoder_tensor, 256, [3, 3], stride=2, padding='SAME', scope="conv3", activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, trainable=True) out_tensor = math_ops.reduce_mean(encoder_tensor, [1, 2], name='gap', keepdims=False) #tf.summary.histogram(layer_name,out_tensor) #old style #out_tensor = tf.reduce_mean(encoder_tensor,axis=[1,2]) return out_tensor with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs_X, is_training=True) #current_epoch = tf.Variable(0, name="current_epoch") with tf.variable_scope("encoder"): encoder1 = encoder("resnet_v1_50/block1/unit_3/bottleneck_v1/Relu:0", "encoder1") encoder2 = encoder("resnet_v1_50/block2/unit_4/bottleneck_v1/Relu:0", "encoder2") encoder3 = encoder("resnet_v1_50/block3/unit_6/bottleneck_v1/Relu:0", "encoder3") encoder4 = encoder("resnet_v1_50/block4/unit_3/bottleneck_v1/Relu:0", "encoder4") concat = tf.concat([encoder1, encoder2, encoder3, encoder4], -1, name='concat') #tf.summary.histogram('concat',concat) predictions = layers_lib.fully_connected(concat, 1, activation_fn=tf.nn.relu, scope="fintune_FC") current_epoch = tf.Variable(0, name="current_epoch") return predictions, current_epoch return end_points, current_epoch
def build_single_resnet(train_tfdata, is_train, name_scope='resnet_v1_50', variable_scope=''): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=is_train)): identity, end_points = resnet_v1.resnet_v1_50( train_tfdata, num_classes=FLAGS.num_class, global_pool=True) feature = slim.flatten(tf.get_default_graph().get_tensor_by_name( '%s%s/pool5:0' % (variable_scope, name_scope))) return identity, feature
def _build_video_network(self, visual_images, is_training): """ Builds a ResNet-50 network using slim. """ with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=5e-4)): output, network = resnet_v1.resnet_v1_50(visual_images, num_classes=1024, is_training=is_training) return output, network
def resnet_feature(self, images, scope_name, train_mode=True): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(images, 1000, is_training=train_mode, reuse=tf.AUTO_REUSE) resnet_block_4 = end_points[scope_name + '/resnet_v1_50/block4'] resnet_feature = tf.reduce_mean(resnet_block_4, [1, 2], keepdims=True) resnet_feature = tf.squeeze(resnet_feature) resnet_feature = tf.reshape(resnet_feature, [-1, 2048]) return resnet_block_4, resnet_feature
def resnet_v1_50(X, is_training, num_classes): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.max_pool2d], padding='SAME'): net, _ = resnet_v1.resnet_v1_50(X, num_classes=num_classes, is_training=is_training, global_pool=False, output_stride=16) net = slim.conv2d_transpose(net, num_classes, 31, 16, scope='upscale') net = tf.identity(net, 'logits') return net, 16
def resnet_model(image, reuse): with tf.variable_scope("model", reuse=reuse): with slim.arg_scope(resnet_v1.resnet_arg_scope()): outputs, _ = resnet_v1.resnet_v1_50(image) #outputs,_ = inception_resnet_v2(image) outputs = slim.flatten(outputs) outputs = slim.fully_connected(outputs, 256) logits = slim.fully_connected(outputs, num_classes, activation_fn=None) return outputs, logits
def create_model(self, frames): with tf.variable_scope("video_model"): with slim.arg_scope(slim.nets.resnet_utils.resnet_arg_scope()): batch_size, height, width, channels = frames.get_shape().as_list() video_input = tf.reshape(video_frames, (batch_size, height, width, channels)) video_input = tf.cast(video_input, tf.float32) features, _ = resnet_v1.resnet_v1_50(video_input, None, self.is_training) features = tf.reshape(features, (batch_size, seq_length, int(features.get_shape()[3]))) return features
def make_net(Xhinted, training): """Build a ResNet FCN architecture Args: X (4-D Tensor): (N, H, W, C) training (1-D Tensor): Boolean Tensor is required for batchnormalization layers Returns: output (4-D Tensor): (N, H, W, C) Same shape as the `input` tensor Notes: U-Net: Convolutional Networks for Biomedical Image Segmentation https://arxiv.org/abs/1505.04597 """ net = Xhinted / 127.5 - 1 with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(net, None, is_training=training, global_pool=False, reuse=False, output_stride=16) # net = tf.layers.conv2d(net, 4, (1, 1), name="color_space_adjust") # conv1, pool1 = conv_conv_pool(net, [8, 8], training, name=1) # conv2, pool2 = conv_conv_pool(pool1, [16, 16], training, name=2) # conv3, pool3 = conv_conv_pool(pool2, [32, 32], training, name=3) # conv4, pool4 = conv_conv_pool(pool3, [64, 64], training, name=4) conv5_cla = conv_conv_pool(net, [128, 128], training, name=5, pool=False) # N x X x y x 128 mp = tf.reduce_mean(conv5_cla, axis=[1, 2], keepdims=True) print('Output of GAP: ', mp) cla = tf.layers.conv2d(mp, 1, (1, 1), name='final', activation=None, padding='same') wsl_out = tf.layers.conv2d(conv5_cla, 1, (1, 1), name='final', activation=None, padding='same', reuse=True) return tf.nn.sigmoid(tf.reshape(cla, [-1, 1])), tf.nn.sigmoid(wsl_out)
def E(self, images, reuse=False, make_preds=False, is_training=False): if self.mode == 'features': with tf.variable_scope('resnet_v1_50', reuse=reuse): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): images = tf.reshape(images, [-1, 1, 1, self.hidden_repr_size]) return slim.conv2d(images, self.no_classes, [1, 1], activation_fn=None, scope='_logits_') with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50(images, self.no_classes, is_training=is_training, reuse=reuse) with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = end_points[ 'resnet_v1_50/block4'] #last bottleneck before logits with tf.variable_scope('resnet_v1_50', reuse=reuse): net = slim.conv2d(net, self.hidden_repr_size, [7, 7], padding='VALID', activation_fn=tf.tanh, scope='f_repr') if (self.mode == 'pretrain' or self.mode == 'test' or make_preds): #~ net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout7') net = slim.conv2d(net, self.no_classes, [1, 1], activation_fn=None, scope='_logits_') return net
def video_model(video_frames=None, audio_frames=None): """Complete me... Args: Returns: """ batch_size, seq_length, height, width, channels = video_frames.get_shape().as_list() video_input = tf.reshape(video_frames, (batch_size * seq_length, height, width, channels)) video_input = tf.cast(video_input, tf.float32) features, _ = resnet_v1.resnet_v1_50(video_input, None) features = tf.reshape(features, (batch_size, seq_length, int(features.get_shape()[3]))) return features
def video_model(video_frames=None, audio_frames=None): with tf.variable_scope("video_model"): batch_size, seq_length, height, width, channels = video_frames.get_shape( ).as_list() video_input = tf.reshape( video_frames, (batch_size * seq_length, height, width, channels)) video_input = tf.cast(video_input, tf.float32) features, end_points = resnet_v1.resnet_v1_50(video_input, None) features = tf.reshape( features, (batch_size, seq_length, int(features.get_shape()[3]))) return features
def test_resnet(self): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(self.inputs, self.nbclasses, is_training=False) saver = tf.train.Saver(tf.global_variables()) check_point = 'test/data/resnet_v1_50.ckpt' sess = tf.InteractiveSession() saver.restore(sess, check_point) self.sess = sess self.graph_origin = tf.get_default_graph() self.target_op_name = darkon.Gradcam.candidate_featuremap_op_names( sess, self.graph_origin)[-1] self.model_name = 'resnet'
def setUp(self): tf.reset_default_graph() self.nbclasses = 1000 inputs = tf.placeholder(tf.float32, [1, 224, 224, 3]) with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs, self.nbclasses, is_training=False) saver = tf.train.Saver(tf.global_variables()) check_point = 'test/data/resnet_v1_50.ckpt' sess = tf.InteractiveSession() saver.restore(sess, check_point) conv_name = 'resnet_v1_50/block4/unit_3/bottleneck_v1/Relu' self.graph_origin = tf.get_default_graph().as_graph_def() self.insp = darkon.Gradcam(inputs, self.nbclasses, conv_name) self.sess = sess
def build_single_resnet(train_tfdata, is_train, name_scope = 'resnet_v1_50', variable_scope = ''): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=is_train)): identity, end_points = resnet_v1.resnet_v1_50(train_tfdata, num_classes=FLAGS.num_class, global_pool = True) feature = slim.flatten(tf.get_default_graph().get_tensor_by_name('%s%s/pool5:0' % (variable_scope, name_scope))) return identity, feature