def res_fcn_32s(inputs, num_classes, is_training): with tf.variable_scope('res_fcn_32s'): # Use the structure of res_v1_50 classification network with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(inputs, num_classes, is_training=is_training, global_pool=False, output_stride=32) # Deconvolutional layers to recover the size of input image # Padding is 'SAME' for conv layers thus conv layers do not change the size # There are 5 max-pool layers with size reduced by half # Totally size reduced by scale of 2^5 = 32 times # That's also the reason why this model is called fcn_32s # Use bilinear interpolation for upsampling upsample_filter = upsampling.bilinear_upsample_weights(32, num_classes) upsample_filter_tensor = tf.constant(upsample_filter) shape = tf.shape(net) output = tf.nn.conv2d_transpose(net, upsample_filter_tensor, output_shape = tf.stack([shape[0], shape[1] * 32, shape[2] * 32, shape[3]]), strides=[1, 32, 32, 1]) variables = slim.get_variables('res_fcn_32s') # Extract variables that are the same as original vgg-16, they could be intialized # with pre-trained vgg-16 network res_variables = {} for variable in variables: res_variables[variable.name[12:-2]] = variable return output, res_variables
def predict(self, preprocessed_inputs): """Predict prediction tensors from inputs tensor. Outputs of this function can be passed to loss or postprocess functions. Args: preprocessed_inputs: A float32 tensor with shape [batch_size, height, width, num_channels] representing a batch of images. Returns: prediction_dict: A dictionary holding prediction tensors to be passed to the Loss or Postprocess functions. """ with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, endpoints = resnet_v1.resnet_v1_50(preprocessed_inputs, num_classes=None, is_training=True) print(resnet_v1.resnet_v1_50) net = tf.squeeze(net, axis=[1, 2]) print(net) logits = slim.fully_connected(net, num_outputs=self.num_classes, activation_fn=None, scope='Predict') prediction_dict = {'logits': logits} return prediction_dict
def main(): ckpt_path = './resnet_v1_50.ckpt' X = tf.placeholder(tf.float32, shape=[None, 96, 96, 3], name='input') with slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v1_50(X, num_classes=1000, is_training=False) final_layer_to_load = end_points['resnet_v1_50/block4'] saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, ckpt_path) frozen_graph_def = convert_variables_to_constants( sess, sess.graph_def, output_node_names=[final_layer_to_load.name.split(':')[0]]) frozen_graph = tf.Graph() with frozen_graph.as_default(): tf.import_graph_def(frozen_graph_def, name='') sess = tf.Session(graph=frozen_graph) res = sess.run(final_layer_to_load.name, {'input:0': np.ones(shape=[12, 96, 96, 3])}) print("out shape: {}".format(res.shape))
def create_trunk(self, images): red, green, blue = tf.split(images * 255, 3, axis=3) images = tf.concat([blue, green, red], 3) - MEAN_COLOR with slim.arg_scope( resnet_v1.resnet_arg_scope(is_training=self.training, weight_decay=self.weight_decay, batch_norm_decay=args.bn_decay)): blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 3), resnet_utils.Block('block2', bottleneck, [(512, 128, 2)] + [(512, 128, 1)] * 3), resnet_utils.Block('block3', bottleneck, [(1024, 256, 2)] + [(1024, 256, 1)] * self.num_block3), resnet_utils.Block('block4', bottleneck, [(2048, 512, 2)] + [(2048, 512, 1)] * 2) ] net, endpoints = resnet_v1.resnet_v1(images, blocks, global_pool=False, reuse=self.reuse, scope=self.scope) self.outputs = endpoints self.add_extra_layers(net)
def predict(self, inputs_dict): """Predict prediction tensors from inputs tensor. Outputs of this function can be passed to loss or postprocess functions. Args: inputs_dict: a dictionary of inputs, include: inputs, is_training inputs: A float32 placeholder or tensor with shape [batch_size, height, width, num_channels] representing a batch of images. tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='inputs') is_training: tf.placeholder(tf.bool, name='is_training') Returns: prediction_dict: A dictionary holding prediction tensors to be passed to the Loss or Postprocess functions. """ input_images = inputs_dict['inputs'] is_training = inputs_dict['is_training'] with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, endpoints = resnet_v1.resnet_v1_35( input_images, num_classes=None, is_training=is_training) with tf.variable_scope('Logits'): # the last average pooling layer makes the resnet50 ouput tensor with shape [None, 1, 1, 2048] # use tf.squeeze to flatten it into [None, 2048] net = tf.squeeze(net, axis=[1, 2]) net = slim.dropout(net, keep_prob=0.5, is_training=is_training, scope='scope') logits = slim.fully_connected(net, num_outputs=self.num_classes, activation_fn=None, scope='fc') prediction_dict = {'logits': logits} return prediction_dict
def predict(self, preprocessed_inputs): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, endpoints = resnet_v1.resnet_v1_50( preprocessed_inputs, num_classes=None, is_training=self._is_training) net = tf.squeeze(net, axis=[1, 2]) logits = slim.fully_connected(net, num_outputs=self.num_classes, activation_fn=None, scope='Predict') prediction_dict = {'logits': logits} return prediction_dict
def endpoints(image, is_training): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_50/block4'], [1, 2], name='pool5') return endpoints, 'resnet_v1_50'
def build_FPN(images, config, is_training, backbone='resnet50'): # images: [batch, h, w, channels] # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] pyramid = {} # build backbone network with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)): if backbone == "resnet50": logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') pyramid['C2'] = end_points[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_50/block4/unit_3/bottleneck_v1'] elif backbone == "resnet101": logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') pyramid['C2'] = end_points[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_101/block4/unit_3/bottleneck_v1'] else: print("Unkown backbone : ", backbone) # build FPN pyramid_feature = {} arg_scope = _extra_conv_arg_scope_with_bn() with tf.variable_scope('FPN'): with slim.arg_scope(arg_scope): pyramid_feature['P5'] = slim.conv2d(pyramid['C5'], config.TOP_DOWN_PYRAMID_SIZE, 1) for i in range(4, 1, -1): upshape = tf.shape(pyramid['C%d' % i]) u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \ size = (upshape[1], upshape[2])) c = slim.conv2d(pyramid['C%d' % i], config.TOP_DOWN_PYRAMID_SIZE, 1) s = tf.add(c, u) pyramid_feature['P%d' % i] = slim.conv2d( s, config.TOP_DOWN_PYRAMID_SIZE, 3) return pyramid_feature
def main(args): dataset = CifarDataSet(args.batch_size, args.data_dir) dataset.make_batch_valid_or_test() if 'cifar-100' in args.data_dir: num_classes = 100 else: num_classes = 10 model = resnet_v1.resnet_v1_110 # it's actually a 112 since there are 2 additional 1x1 conv for shortcuts print("Data loaded! Building model...") with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, _ = model(dataset.images_vt, num_classes, is_training=False) logits = tf.squeeze(net, [1, 2]) # tf saver, session restorer = tf.train.Saver() config = tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions( force_gpu_compatible=True, allow_growth=True)) sess = tf.Session(config=config) sess.run(dataset.iterator_vt.initializer, feed_dict={dataset.validation: True}) restorer.restore(sess, tf.train.latest_checkpoint(args.save_dir)) print("Model built! Getting logits...") logits_nps = [] num_eval_batches = dataset.images_np['valid'].shape[ 0] // dataset.eval_batch_size for step in range(num_eval_batches): logits_np = sess.run(logits) logits_nps.append(logits_np) logits_nps = np.concatenate(logits_nps) print("Logits get! Do temperature scaling...") print("=" * 80) temp_var = temp_scaling(logits_nps, dataset.labels_np['valid'], sess) # use temp_var with your logits to get calibrated output print("=" * 80) print("Done!")
def get_network(name, image, weight_decay=0.000005, is_training=False, reuse=False): if name == 'resnet50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet50(image, num_classes=None, is_training=is_training, reuse=reuse) end_points['input'] = image return logits, end_points
def resnet_forward(self, x, layer, scope): x = 255.0 * (0.5 * (x + 1.0)) # subtract means mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') # RGB means from VGG paper x = x - mean # send through resnet with slim.arg_scope(resnet_arg_scope()): _, layers = resnet_v1_50(x, num_classes=1000, is_training=False, reuse=self.reuse_resnet) self.reuse_resnet = True return layers['resnet_v1_50/' + layer]
def create_trunk(images, rois=None, reuse=False, fc_layers=True, weight_decay=0.0005): red, green, blue = tf.split(images * 255, 3, axis=3) images = tf.concat([blue, green, red], 3) - RESNET_MEAN with slim.arg_scope( resnet_v1.resnet_arg_scope(is_training=False, weight_decay=weight_decay)): net, endpoints = resnet_frcnn(images, rois=rois, global_pool=True, fc_layers=fc_layers, reuse=reuse) return net, endpoints
def __init__(self, images): self.layer = {} self.images = images with slim.arg_scope(resnet_v1.resnet_arg_scope()): self.nets, _ = resnet_v1.resnet_v1_101(self.images, 1000, is_training=False, spatial_squeeze=False, global_pool=False, output_stride=16) print(len(self.nets)) for index in range(len(self.nets)): print("resnet_bolck_%d" % (index + 1)) print(self.nets[index].get_shape()) self.layer['block1'] = self.nets[0] self.layer['block2'] = self.nets[1] self.layer['block3'] = self.nets[2] self.layer['block4'] = self.nets[3]
def inference(self, x, is_training=True, reuse=False): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, endpoints = self.resnet_build_func(inputs=x, num_classes=None, is_training=is_training, reuse=reuse, scope=self.network_name) with tf.variable_scope('Logits'): # the last average pooling layer makes the resnet50 ouput tensor with shape [None, 1, 1, 2048] # use tf.squeeze to flatten it into [None, 2048] net = tf.squeeze(net, axis=[1, 2]) #net = slim.fully_connected(net, 512, scope='fc_inter') #net = slim.dropout(net, keep_prob=0.5, is_training=is_training, scope='fc_dropout') logits = slim.fully_connected(net, num_outputs=self.num_classes, activation_fn=None, scope='fc') return logits
def model(x, H, reuse, is_training=True): if H['slim_basename'] == 'resnet_v1_101': with slim.arg_scope(resnet.resnet_arg_scope()): _, T = resnet.resnet_v1_101(x, is_training=is_training, num_classes=1000, reuse=reuse) elif H['slim_basename'] == 'InceptionV1': with slim.arg_scope(inception.inception_v1_arg_scope()): _, T = inception.inception_v1(x, is_training=is_training, num_classes=1001, spatial_squeeze=False, reuse=reuse) #print '\n'.join(map(str, [(k, v.op.outputs[0].get_shape()) for k, v in T.iteritems()])) coarse_feat = T[H['slim_top_lname']][:, :, :, :H['later_feat_channels']] assert coarse_feat.op.outputs[0].get_shape()[3] == H['later_feat_channels'] # fine feat can be used to reinspect input attention_lname = H.get('slim_attention_lname', 'Mixed_3b') early_feat = T[attention_lname] return coarse_feat, early_feat
def create_trunk(self, images): red, green, blue = tf.split(images*255, 3, axis=3) images = tf.concat([blue, green, red], 3) - MEAN_COLOR with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=self.training, weight_decay=self.weight_decay, batch_norm_decay=args.bn_decay)): blocks = [ resnet_utils.Block( 'block1', bottleneck, [(256, 64, 1)] * 3), resnet_utils.Block( 'block2', bottleneck, [(512, 128, 2)] + [(512, 128, 1)] * 3), resnet_utils.Block( 'block3', bottleneck, [(1024, 256, 2)] + [(1024, 256, 1)] * self.num_block3), resnet_utils.Block( 'block4', bottleneck, [(2048, 512, 2)] + [(2048, 512, 1)] * 2) ] net, endpoints = resnet_v1.resnet_v1(images, blocks, global_pool=False, reuse=self.reuse, scope=self.scope) self.outputs = endpoints self.add_extra_layers(net)
def model(images, text_scale=512, weight_decay=1e-5, is_training=True): """ define the model, we use slim's implemention of resnet """ images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def add_extra_layers(self, net): with slim.arg_scope( resnet_v1.resnet_arg_scope(is_training=self.training, weight_decay=self.weight_decay, batch_norm_decay=args.bn_decay)): block_depth = 2 num_fm = 2048 ''''' blocks = [ resnet_utils.Block( 'block5', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), resnet_utils.Block( 'block6', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), resnet_utils.Block( 'block7', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), ] ''' blocks = [ resnet_utils.Block( 'block5', bottleneck, [(num_fm // 2, num_fm // 2, 2)] + [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)), resnet_utils.Block( 'block6', bottleneck, [(num_fm // 2, num_fm // 2, 2)] + [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)), resnet_utils.Block( 'block7', bottleneck, [(num_fm // 2, num_fm // 2, 1)] + [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)), ] if args.image_size == 512: blocks += [ resnet_utils.Block( 'block8', bottleneck, [(num_fm, num_fm // 4, 2)] + [(num_fm, num_fm // 4, 1)] * (block_depth - 1)), ] net, endpoints = resnet_v1.resnet_v1(net, blocks, global_pool=False, include_root_block=False, reuse=self.reuse, scope=DEFAULT_SSD_SCOPE) self.outputs.update(endpoints) with tf.variable_scope(DEFAULT_SSD_SCOPE + "_back", reuse=self.reuse): end_points_collection = "reverse_ssd_end_points" #with slim.arg_scope([slim.conv2d, attention], #with slim.arg_scope([slim.conv2d, sub_pixel_skip], #with slim.arg_scope([slim.conv2d, noconcat], #with slim.arg_scope([slim.conv2d, bottleneck_skip], with slim.arg_scope([slim.conv2d, tail_att], outputs_collections=end_points_collection): top_fm = args.top_fm int_fm = top_fm // 4 if args.image_size == 512: # as long as the number of pooling layers is bigger due to # the higher resolution, an extra layer is appended #net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'], # top_fm, int_fm, scope='block_rev7') #net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'], # top_fm, int_fm, scope='block_rev7') #net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'], # top_fm, int_fm, scope='block_rev7') #net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'], # top_fm, int_fm, scope='block_rev7') net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'], top_fm, int_fm, scope='block_rev7') ''''' net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = attention(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = attention(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = attention(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' ''''' net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = sub_pixel_skip(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = sub_pixel_skip(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = sub_pixel_skip(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' ''''' net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = noconcat(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = noconcat(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = noconcat(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' '' ''''' net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block6'], top_fm, int_fm, scope='block_rev6') net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block5'], top_fm, int_fm, scope='block_rev5') net = bottleneck_skip(net, self.outputs[self.scope+'/block4'], top_fm, int_fm, scope='block_rev4') net = bottleneck_skip(net, self.outputs[self.scope+'/block3'], top_fm, int_fm, scope='block_rev3') net = bottleneck_skip(net, self.outputs[self.scope+'/block2'], top_fm, int_fm, scope='block_rev2') ''' ''''' net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, int_fm, scope='block_rev6') net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, int_fm, scope='block_rev5') net = tail_att(net, self.outputs[self.scope + '/block4'], top_fm, int_fm, scope='block_rev4') net = tail_att(net, self.outputs[self.scope + '/block3'], top_fm, int_fm, scope='block_rev3') net = tail_att(net, self.outputs[self.scope + '/block2'], top_fm, int_fm, scope='block_rev2') ''' net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'], top_fm, top_fm, scope='block_rev6') net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'], top_fm, top_fm, scope='block_rev5') net = tail_att(net, self.outputs[self.scope + '/block4'], top_fm, top_fm, scope='block_rev4') net = tail_att(net, self.outputs[self.scope + '/block3'], top_fm, top_fm, scope='block_rev3') net = tail_att(net, self.outputs[self.scope + '/block2'], top_fm, top_fm, scope='block_rev2') if args.x4: # To provide stride 4 we add one more layer with upsampling #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'], # top_fm, int_fm, scope='block_rev1') #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'], # top_fm, int_fm, scope='block_rev1') #net = noconcat(net, self.outputs[self.scope+'/block1'], # top_fm, int_fm, scope='block_rev1') #net = bottleneck_skip(net, self.outputs[self.scope+'/block1'], # top_fm, int_fm, scope='block_rev1') net = tail_att(net, self.outputs[self.scope + '/block1'], top_fm, int_fm, scope='block_rev1') endpoints = slim.utils.convert_collection_to_dict( end_points_collection) self.outputs.update(endpoints) # Creating an output of spatial resolution 1x1 with conventional name 'pool6' if args.image_size == 512: self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\ tf.reduce_mean(self.outputs['ssd_back/block_rev7/shortcut'], [1, 2], name='pool6', keep_dims=True) else: self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\ tf.reduce_mean(self.outputs['ssd_back/block_rev6/shortcut'], [1, 2], name='pool6', keep_dims=True)
logits, end_points = inception_resnet_v2.inception_resnet_v2(scaledInputBatchImages, is_training=False) elif options.model == "ResNet": if options.useImageMean: imageMean = tf.reduce_mean(inputBatchImagesPlaceholder, axis=[1, 2], keep_dims=True) print ("Image mean shape: %s" % str(imageMean.shape)) processedInputBatchImages = inputBatchImagesPlaceholder - imageMean else: channels = tf.split(axis=3, num_or_size_splits=options.imageChannels, value=inputBatchImagesPlaceholder) for i in range(options.imageChannels): channels[i] -= IMAGENET_MEAN[i] processedInputBatchImages = tf.concat(axis=3, values=channels) print (processedInputBatchImages.shape) # Create model arg_scope = resnet_v1.resnet_arg_scope() with slim.arg_scope(arg_scope): # logits, end_points = resnet_v1.resnet_v1_152(processedInputBatchImages, is_training=options.trainModel, num_classes=numClasses) logits, end_points = resnet_v1.resnet_v1_152(processedInputBatchImages, is_training=False) elif options.model == "NAS": scaledInputBatchImages = tf.scalar_mul((1.0 / 255.0), inputBatchImagesPlaceholder) scaledInputBatchImages = tf.subtract(scaledInputBatchImages, 0.5) scaledInputBatchImages = tf.multiply(scaledInputBatchImages, 2.0) # Create model arg_scope = nasnet.nasnet_large_arg_scope() with slim.arg_scope(arg_scope): # logits, end_points = nasnet.build_nasnet_large(scaledInputBatchImages, is_training=options.trainModel, num_classes=numClasses) logits, end_points = nasnet.build_nasnet_large(scaledInputBatchImages, is_training=False, num_classes=options.numClasses)
def main(args): subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) seed = random.SystemRandom().randint(0, 10240) np.random.seed(seed=seed) train_set = facenet.get_dataset(args.data_dir) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) if args.pretrained_model: print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) if args.validation_dir: print('Validation directory: %s' % args.validation_dir) # Read the file containing the pairs used for testing pairs = read_pairs(os.path.expanduser(args.validation_pairs)) # Get the paths for the corresponding images validation_paths, actual_issame = get_paths( os.path.expanduser(args.validation_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(seed) # Placeholder for the learning rate learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 3), name='image_paths') labels_placeholder = tf.placeholder(tf.int64, shape=(None, 3), name='labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.int64], shapes=[(3, ), (3, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder]) nrof_preprocess_threads = 4 image_size = resnet_v1.resnet_v1_101.default_image_size images_and_labels = [] for _ in range(nrof_preprocess_threads): filenames, label = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) processed_image = vgg_preprocessing.preprocess_image( image, image_size, image_size, is_training=False, bgr=True) # if args.random_crop: # image = tf.random_crop(image, [args.image_size, args.image_size, 3]) # else: # image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: processed_image = tf.image.random_flip_left_right( processed_image) images.append(processed_image) images_and_labels.append([images, label]) image_batch, labels_batch = tf.train.batch_join( images_and_labels, batch_size=batch_size_placeholder, shapes=[(image_size, image_size, 3), ()], enqueue_many=True, capacity=4 * nrof_preprocess_threads * args.batch_size, allow_smaller_final_batch=True) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') labels_batch = tf.identity(labels_batch, 'label_batch') # Build the inference graph with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=args.weight_decay)): val_logits, _ = resnet_v1.resnet_v1_101_triplet( image_batch, embedding_size=DIM_HASHCODE, is_training=phase_train_placeholder) loader = tf.train.Saver() embeddings = tf.squeeze(val_logits['triplet_pre_embeddings'], [1, 2], name='feat_embeddings/squeezed') global_step = tf.Variable(0, trainable=False) # embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Split embeddings into anchor, positive and negative and calculate triplet loss anchor, positive, negative = tf.unstack( tf.reshape(embeddings, [-1, 3, DIM_HASHCODE]), 3, 1) triplet_loss = facenet.triplet_loss(anchor, positive, negative, args.alpha) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([triplet_loss] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op, _ = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.trainable_variables()) # Create a saver saver = tf.train.Saver(max_to_keep=3) #train_op = facenet.train(total_loss, global_step, args.optimizer, # learning_rate, args.moving_average_decay, tf.global_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Initialize variables sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder: True}) sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder: True}) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % args.pretrained_model) loader.restore(sess, os.path.expanduser(args.pretrained_model)) # Training and validation loop epoch = 0 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, train_set, epoch, image_paths_placeholder, labels_placeholder, labels_batch, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, input_queue, global_step, embeddings, total_loss, train_op, summary_op, summary_writer, args.learning_rate_schedule_file, DIM_HASHCODE, anchor, positive, negative, triplet_loss) # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) # Evaluate on validation data set if args.validation_dir: evaluate(sess, validation_paths, embeddings, labels_batch, image_paths_placeholder, labels_placeholder, batch_size_placeholder, learning_rate_placeholder, phase_train_placeholder, enqueue_op, actual_issame, args.batch_size, args.validation_nrof_folds, log_dir, step, summary_writer, DIM_HASHCODE) return model_dir
PB = sys.argv[2] OUT = sys.argv[3] print(IN, PB, OUT) else: exit(0) # 初期設定 tf.reset_default_graph() tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True sess = tf.Session(config=tf_config) # モデルの構築 images = tf.placeholder(tf.float32, (None, 224, 224, 3), name='images') labels = tf.placeholder(tf.int32, (None, 1, 1, 8), name='labels') with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=False, num_classes=8) # チェックポイントの読み込み saver = tf.train.Saver() saver.restore(save_path=IN, sess=sess) # freeze graph output_nodes = ['resnet_v1_50/SpatialSqueeze'] frozen_graph = tf.graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), output_node_names=output_nodes) from convert_relu6 import convertRelu6 frozen_graph = convertRelu6(frozen_graph)
def __init__(self, **kwargs): super().__init__('resnet101.ckpt', 'resnet_v1_101', \ resnet_v1.resnet_arg_scope(), \ resnet_v1.resnet_v1_101, 0, **kwargs)
def net_arg_scope(): if net_type == 'resnet': return resnet_v1.resnet_arg_scope() elif net_type == 'vgg': return vgg.vgg_arg_scope(False)
def _feature_extractor(self, input, mode, scope=None, relu_leakiness=0.1): image = tf.placeholder_with_default(input, (None, 300, 300, 3), 'input_image') pyramid_map = { 'C1': 'FeatureX1/resnet_v1_50/conv1/Relu:0', 'C2': 'FeatureX1/resnet_v1_50/block1/unit_2/bottleneck_v1', 'C3': 'FeatureX1/resnet_v1_50/block2/unit_3/bottleneck_v1', 'C4': 'FeatureX1/resnet_v1_50/block3/unit_5/bottleneck_v1', 'C5': 'FeatureX1/resnet_v1_50/block4/unit_3/bottleneck_v1', } if scope is not None: for key, value in pyramid_map.iteritems(): pyramid_map[key] = scope + "/" + value with tf.variable_scope("FeatureX1"): with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=0.000005)): logits, end_points = resnet_v1.resnet_v1_50( image, 1000, is_training=self.mode == 'train') pyramid = pyramid_network.build_pyramid(pyramid_map, end_points) extra_train_ops = [] py_features = [pyramid['P5']] with tf.variable_scope("FeatureX2"): with tf.variable_scope("pyramid_2"): x = pyramid['P2'] with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 256, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) with tf.variable_scope("block_1"): x, extra_train_ops = resnet_utils.residual( x, 64, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) with tf.variable_scope("block_2"): x, extra_train_ops = resnet_utils.residual( x, 64, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) py_features.append(x) with tf.variable_scope("pyramid_3"): x = pyramid['P3'] with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 256, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) with tf.variable_scope("block_1"): x, extra_train_ops = resnet_utils.residual( x, 64, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) py_features.append(x) with tf.variable_scope("pyramid_4"): x = pyramid['P4'] with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 256, 64, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) py_features.append(x) x = tf.concat(py_features, axis=3, name='concat') with tf.variable_scope("block_0"): x, extra_train_ops = resnet_utils.residual( x, 448, 256, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=True) with tf.variable_scope("block_1"): x, extra_train_ops = resnet_utils.residual( x, 256, 256, resnet_utils.stride_arr(2), mode, extra_train_ops, relu_leakiness, activate_before_residual=False) global_avg = tf.reduce_mean(x, [1, 2], name='global_avg') feature = tf.nn.l2_normalize(global_avg, 0, name='Feature') return feature, extra_train_ops
def build_pspnet(inputs, label_size, num_classes, preset_model='PSPNet-Res50', pooling_type="MAX", weight_decay=1e-5, upscaling_method="bilinear", is_training=True, pretrained_dir="models"): """ Builds the PSPNet model. Arguments: inputs: The input tensor label_size: Size of the final label tensor. We need to know this for proper upscaling preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes pooling_type: Max or Average pooling Returns: PSPNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'PSPNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'PSPNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'PSPNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # PSPNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] feature_map_shape = [int(x / 8.0) for x in label_size] psp = PyramidPoolingModule(f[2], feature_map_shape=feature_map_shape, pooling_type=pooling_type) net = slim.conv2d(psp, 512, [3, 3], activation_fn=None) net = slim.batch_norm(net) net = tf.nn.relu(net) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.dropout(net, keep_prob=(0.9)) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def res_v1_101_lstm(input_imgs, input_seqs, input_masks, batch_size, embedding_size, vocab_size, is_training, lstm_dropout_keep_prob): with tf.variable_scope('res_v1_101_lstm'): # Sequence embedding layer with tf.variable_scope("seq_embedding"): embedding_map = tf.get_variable( name="map", shape=[vocab_size, embedding_size], initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) # Image feature extraction layer with slim.arg_scope(resnet_v1.resnet_arg_scope(trainable=is_training)): # Set is_training = False to fix running mean/variance of batch normalization image_feature, _ = resnet_v1.resnet_v1_101(input_imgs, None, is_training=False, output_stride=32) # Image embedding layer image_feature = tf.squeeze(image_feature, axis=[1, 2]) image_embedding = slim.fully_connected(image_feature, embedding_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(0, 0.01), biases_initializer=tf.zeros_initializer, weights_regularizer=slim.l2_regularizer(0.0005), scope = 'image_embedding') # LSTM layer lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=embedding_size, state_is_tuple=True) # Training process if is_training is True: lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell, input_keep_prob=lstm_dropout_keep_prob, output_keep_prob=lstm_dropout_keep_prob) seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs) with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope: # Feed the image embeddings to set the initial LSTM state. zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32) _, initial_state = lstm_cell(image_embedding, zero_state) lstm_scope.reuse_variables() sequence_length = tf.reduce_sum(input_masks, 1) lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=seq_embeddings, sequence_length=sequence_length, initial_state=initial_state, dtype=tf.float32, scope=lstm_scope) lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size]) # Word logits layer output_logits = slim.fully_connected(lstm_outputs, vocab_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(0, 0.01), biases_initializer=tf.zeros_initializer, weights_regularizer=slim.l2_regularizer(0.0005), scope='logits' ) variables = slim.get_variables('res_v1_101_lstm') res_variables = {} for variable in variables: if 'resnet_v1_101' in variable.name: res_variables[variable.name[16:-2]] = variable return output_logits, res_variables # Inference process else: weights = tf.get_variable("logits/weights", [embedding_size, vocab_size]) biases = tf.get_variable("logits/biases", [vocab_size]) with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope: # Feed the image embeddings to set the initial LSTM state. zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32) _, initial_state = lstm_cell(image_embedding, zero_state) lstm_scope.reuse_variables() memory_state = initial_state output_words = [input_seqs[0]] # TODO: replace the end condition of the loop with meeting the end word for _ in range(30): input_seqs = tf.nn.embedding_lookup(embedding_map, input_seqs) output_seqs, memory_state = lstm_cell(input_seqs, memory_state) output_logits = tf.matmul(output_seqs, weights) + biases output_word = tf.argmax(output_logits, -1) output_words.append(output_word[0]) input_seqs = output_word output_words = tf.stack(output_words) return output_words
def build_refinenet(inputs, num_classes, preset_model='RefineNet-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the RefineNet model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: RefineNet model """ inputs = mean_image_subtraction(inputs) if preset_model == 'RefineNet-Res50': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( inputs, is_training=is_training, scope='resnet_v1_50') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'RefineNet-Res101': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( inputs, is_training=is_training, scope='resnet_v1_101') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'RefineNet-Res152': with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152( inputs, is_training=is_training, scope='resnet_v1_152') # RefineNet requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn( os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError( "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i] = slim.conv2d(f[i], 256, 1) g[0] = RefineBlock(high_inputs=None, low_inputs=h[0]) g[1] = RefineBlock(g[0], h[1]) g[2] = RefineBlock(g[1], h[2]) g[3] = RefineBlock(g[2], h[3]) # g[3]=Upsampling(g[3],scale=4) if upscaling_method.lower() == "conv": net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 256) net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 128) net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2) net = ConvBlock(net, 64) elif upscaling_method.lower() == "bilinear": net = Upsampling(net, label_size) net = slim.conv2d(g[3], num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def export(): # Create index->synset mapping synsets = [] with open(SYNSET_FILE) as f: synsets = f.read().splitlines() # Create synset->metadata mapping texts = {} with open(METADATA_FILE) as f: for line in f.read().splitlines(): parts = line.split('\t') assert len(parts) == 2 texts[parts[0]] = parts[1] with tf.Graph().as_default(): # Build inference model. # Please refer to Tensorflow inception model for details. # Input transformation. # serialized_tf_example = tf.placeholder(tf.string, name='tf_example') # feature_configs = { # 'image/encoded': tf.FixedLenFeature( # shape=[], dtype=tf.string), # } # tf_example = tf.parse_example(serialized_tf_example, feature_configs) # jpegs = tf_example['image/encoded'] serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = { 'x': tf.FixedLenFeature(shape=[], dtype=tf.float32), } tf_example = tf.parse_example(serialized_tf_example, feature_configs) # reshape the input image to its original dimension tf_example['x'] = tf.reshape(tf_example['x'], (1, 224, 224, 3)) input_tensor = tf.identity( tf_example['x'], name='x') # use tf.identity() to assign name # images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32) # Run inference. with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_50(input_tensor, 1000, is_training=False) # logits, _ = inception_model.inference(images, NUM_CLASSES + 1) # Transform output to topK result. values, indices = tf.nn.top_k(net, NUM_TOP_CLASSES) # Create a constant string Tensor where the i'th element is # the human readable class description for the i'th index. # Note that the 0th index is an unused background class # (see inception model definition code). class_descriptions = ['unused background'] for s in synsets: class_descriptions.append(texts[s]) class_tensor = tf.constant(class_descriptions) table = tf.contrib.lookup.index_to_string_table_from_tensor( class_tensor) classes = table.lookup(tf.to_int64(indices)) # Restore variables from training checkpoint. # variable_averages = tf.train.ExponentialMovingAverage( # inception_model.MOVING_AVERAGE_DECAY) # variables_to_restore = variable_averages.variables_to_restore() # saver = tf.train.Saver(variables_to_restore) saver = tf.train.Saver() with tf.Session() as sess: # Restore variables from training checkpoints. # ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) # if ckpt and ckpt.model_checkpoint_path: # saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/imagenet_train/model.ckpt-0, # extract global_step from it. # global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] # print('Successfully loaded model from %s at step=%s.' % # (ckpt.model_checkpoint_path, global_step)) # else: # print('No checkpoint file found at %s' % FLAGS.checkpoint_dir) # return # Export inference model. saver.restore( sess, os.path.join(pre_trained_model_dir, "resnet_v1_50.ckpt")) print("Model", model_name, "restored.") output_path = os.path.join( tf.compat.as_bytes(FLAGS.output_dir), tf.compat.as_bytes(str(FLAGS.model_version))) print('Exporting trained model to', output_path) builder = tf.saved_model.builder.SavedModelBuilder(output_path) # Build the signature_def_map. classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( serialized_tf_example) classes_output_tensor_info = tf.saved_model.utils.build_tensor_info( classes) scores_output_tensor_info = tf.saved_model.utils.build_tensor_info( values) classification_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={ tf.saved_model.signature_constants.CLASSIFY_INPUTS: classify_inputs_tensor_info }, outputs={ tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES: classes_output_tensor_info, tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES: scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. CLASSIFY_METHOD_NAME)) predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info( input_tensor) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'images': predict_inputs_tensor_info}, outputs={ 'classes': classes_output_tensor_info, 'scores': scores_output_tensor_info }, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME)) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict_images': prediction_signature, tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: classification_signature, }, main_op=tf.tables_initializer(), strip_default_attrs=True) builder.save() print('Successfully exported model to %s' % FLAGS.output_dir)
def train(args): dataset = CifarDataSet(args.batch_size, args.data_dir) dataset.make_batch_train() dataset.make_batch_valid_or_test() if 'cifar-100' in args.data_dir: num_classes = 100 else: num_classes = 10 model = resnet_v1.resnet_v1_110 # it's actually a 112 since there are 2 additional 1x1 conv for shortcuts print ("Data loaded! Building model...") # for training with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = model(dataset.images_train, num_classes) logits = tf.squeeze(net, [1, 2], name='SqueezedLogits') # for evaluating with slim.arg_scope(resnet_v1.resnet_arg_scope()): net_eval, _ = model(dataset.images_vt, num_classes, is_training=False, reuse=True) predictions = tf.argmax(tf.squeeze(net_eval, [1, 2]), axis=-1) cross_entropy_loss_op = tf.losses.sparse_softmax_cross_entropy( labels=dataset.labels_train, logits=logits) l2_loss_op = tf.losses.get_regularization_loss() loss_op = cross_entropy_loss_op + l2_loss_op num_train_batches = dataset.images_np['train'].shape[0] // args.batch_size gstep_op, lr_op, train_op, saver, sess = get_train_ops(args, num_train_batches, loss_op) print ("Train ops get! Start training...") while True: cross_entropy_loss, l2_loss, gstep, lr, _ = sess.run([ cross_entropy_loss_op, l2_loss_op, gstep_op, lr_op, train_op ]) cur_epoch = gstep // num_train_batches + 1 if gstep % args.log_every == 0: log_string = "({:5d}/{:5d})".format(gstep, num_train_batches * args.epoch) log_string += " cross entropy loss: {:.4f}, l2 loss: {:.4f},".format(cross_entropy_loss, l2_loss) log_string += " lr: {:.4f}".format(lr) log_string += " (ep: {:3d})".format(cur_epoch) print (log_string) if (gstep + 1) % num_train_batches == 0: print ("Saving .ckpt and evaluating with validation set...") saver.save(sess, os.path.join(args.save_dir, 'model.ckpt'), global_step=cur_epoch) sess.run(dataset.iterator_vt.initializer, feed_dict={dataset.validation: True}) corrects = 0 num_eval_batches = dataset.images_np['valid'].shape[0] // dataset.eval_batch_size for step in range(num_eval_batches): preds, labels = sess.run([predictions, dataset.labels_vt]) corrects += np.sum(preds == labels) print ("validation accuracy: {:.3f}% ({:4d}/{:4d})".format( 100 * corrects / dataset.images_np['valid'].shape[0],\ corrects, dataset.images_np['valid'].shape[0] )) print ("=" * 80) if (gstep + 1) % (num_train_batches * args.eval_every) == 0: print ("Evaluating with test set...") sess.run(dataset.iterator_vt.initializer, feed_dict={dataset.validation: False}) corrects = 0 num_eval_batches = dataset.images_np['test'].shape[0] // dataset.eval_batch_size for step in range(num_eval_batches): preds, labels = sess.run([predictions, dataset.labels_vt]) corrects += np.sum(preds == labels) print ("test accuracy: {:.3f}% ({:5d}/{:5d})".format( 100 * corrects / dataset.images_np['test'].shape[0], corrects, dataset.images_np['test'].shape[0] )) print ("=" * 80) if cur_epoch > args.epoch: break print ("Done!")
def build_gcn(inputs, num_classes, preset_model='GCN-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"): """ Builds the GCN model. Arguments: inputs: The input tensor preset_model: Which model you want to use. Select which ResNet model to use for feature extraction num_classes: Number of classes Returns: GCN model """ inputs = mean_image_subtraction(inputs) if preset_model == 'GCN-Res50': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=is_training, scope='resnet_v1_50') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif preset_model == 'GCN-Res101': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(inputs, is_training=is_training, scope='resnet_v1_101') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif preset_model == 'GCN-Res152': with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_152(inputs, is_training=is_training, scope='resnet_v1_152') # GCN requires pre-trained ResNet weights init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) else: raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model)) res = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] down_5 = GlobalConvBlock(res[0], n_filters=21, size=3) down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3]) down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2) down_4 = GlobalConvBlock(res[1], n_filters=21, size=3) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = tf.add(down_4, down_5) down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3]) down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2) down_3 = GlobalConvBlock(res[2], n_filters=21, size=3) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = tf.add(down_3, down_4) down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3]) down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2) down_2 = GlobalConvBlock(res[3], n_filters=21, size=3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = tf.add(down_2, down_3) down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3]) net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2) net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3]) net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits') return net, init_fn
def add_extra_layers(self, net): with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=self.training, weight_decay=self.weight_decay, batch_norm_decay=args.bn_decay)): block_depth = 2 num_fm = 2048 blocks = [ resnet_utils.Block( 'block5', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), resnet_utils.Block( 'block6', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), resnet_utils.Block( 'block7', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), ] if args.image_size == 512: blocks += [ resnet_utils.Block( 'block8', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)), ] net, endpoints = resnet_v1.resnet_v1(net, blocks, global_pool=False, include_root_block=False, reuse=self.reuse, scope=DEFAULT_SSD_SCOPE) self.outputs.update(endpoints) with tf.variable_scope(DEFAULT_SSD_SCOPE+"_back", reuse=self.reuse): end_points_collection = "reverse_ssd_end_points" with slim.arg_scope([slim.conv2d, bottleneck_skip], outputs_collections=end_points_collection): top_fm = args.top_fm int_fm = top_fm//4 if args.image_size == 512: # as long as the number of pooling layers is bigger due to # the higher resolution, an extra layer is appended net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'], top_fm, int_fm, scope='block_rev7') net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block6'], top_fm, int_fm, scope='block_rev6') net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block5'], top_fm, int_fm, scope='block_rev5') net = bottleneck_skip(net, self.outputs[self.scope+'/block4'], top_fm, int_fm, scope='block_rev4') net = bottleneck_skip(net, self.outputs[self.scope+'/block3'], top_fm, int_fm, scope='block_rev3') net = bottleneck_skip(net, self.outputs[self.scope+'/block2'], top_fm, int_fm, scope='block_rev2') if args.x4: # To provide stride 4 we add one more layer with upsampling net = bottleneck_skip(net, self.outputs[self.scope+'/block1'], top_fm, int_fm, scope='block_rev1') endpoints = slim.utils.convert_collection_to_dict(end_points_collection) self.outputs.update(endpoints) # Creating an output of spatial resolution 1x1 with conventional name 'pool6' if args.image_size == 512: self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\ tf.reduce_mean(self.outputs['ssd_back/block_rev7/shortcut'], [1, 2], name='pool6', keep_dims=True) else: self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\ tf.reduce_mean(self.outputs['ssd_back/block_rev6/shortcut'], [1, 2], name='pool6', keep_dims=True)