def _build(self, weight_path, sess, input_type=InputType.BASE64_JPEG): self.input_tensor = None self.session = sess if input_type == InputType.TENSOR: self.input = tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name="input") self.input_tensor = self.input elif input_type == InputType.BASE64_JPEG: self.input = tf.placeholder(tf.string, shape=(None, ), name='input') self.input_tensor = load_base64_tensor(self.input) else: raise ValueError('invalid input type') # only load inference model with arg_scope( resnet_v1.resnet_arg_scope(activation_fn=tf.nn.relu, weight_decay=0.0001)): self.logits_val, end_points = resnet_v1.resnet_v1_101( self.input_tensor, num_classes=self.num_classes, is_training=False, reuse=tf.AUTO_REUSE) # self.predictions = tf.nn.softmax(self.logits_val, name='Softmax') self.predictions = end_points['predictions'] self.output = tf.identity(self.predictions, name='outputs') if weight_path is not None: self.load_trained_weights(weight_path)
def test_resnet_v1_101(img_dir): """ Test ResNet-V1-101 with a single image. :param img_dir: Path of the image to be classified :return: classification result and probability of a single image """ img = cv2.imread(img_dir) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, (224, 224)) img = img.reshape((1, 224, 224, 3)) tf.reset_default_graph() inputs = tf.placeholder(name='input_images', shape=[None, 224, 224, 3], dtype=tf.float32) with slim.arg_scope(resnet_arg_scope()): _, _ = resnet_v1_101(inputs, 1000, is_training=False) with tf.Session() as sess: tf.train.Saver().restore(sess, './models/resnet_v1_101.ckpt') inputs = sess.graph.get_tensor_by_name('input_images:0') outputs = sess.graph.get_tensor_by_name( 'resnet_v1_101/SpatialSqueeze:0') pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0] prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0] pred, prob = sess.run([pred, prob], feed_dict={inputs: img}) name = label_dict[pred + 1] print('Result of ResNet-V1-101:', name, prob) return name, prob
def single_tower(colors, depths, num_classes=3, num_channels=1000, is_training=True, global_pool=False, output_stride=16, spatial_squeeze=False, scope='arcnet'): inputs = tf.concat([colors, depths], axis=3) with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101( inputs=inputs, num_classes=num_channels, is_training=is_training, global_pool=global_pool, output_stride=output_stride, spatial_squeeze=spatial_squeeze, scope=scope + '_tower') with tf.variable_scope(scope, 'arcnet', [net]) as sc: end_points_collection = sc.original_name_scope + '_end_points' # to do: add batch normalization to the following conv layers. with slim.arg_scope([slim.conv2d], outputs_collections=end_points_collection): net = slim.conv2d(net, 512, [1, 1], scope='conv1') net = slim.conv2d(net, 128, [1, 1], scope='conv2') net = slim.conv2d(net, num_classes, [1, 1], scope='conv3') height, width = net.get_shape().as_list()[1:3] net = tf.image.resize_bilinear(net, [height * 2, width * 2], name='resize_bilinear') end_points = slim.utils.convert_collection_to_dict( end_points_collection) end_points['logits'] = net return net, end_points
def create_network(self): with tf.contrib.slim.arg_scope(resnet_arg_scope()): logits, end_points = resnet_v1_101(self.img, num_classes=self.nb_class, is_training=self.is_training, global_pool=True, spatial_squeeze=True) self.logits = logits self.probabilities = tf.nn.sigmoid(self.logits) self.predictions = tf.cast( self.probabilities >= self.prediction_threshold, tf.float32)
def model(model_type, images, weight_decay=1e-5, is_training=True): images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(images, is_training=is_training, scope='resnet_v1_101') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = {'decay': 0.997,'epsilon': 1e-5,'scale': True,'is_training': is_training} with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i]=slim.conv2d(f[i], 256, 1) for i in range(4): print('Shape of h_{} {}'.format(i, h[i].shape)) g[0]=RefineBlock(high_inputs=None,low_inputs=h[0]) g[1]=RefineBlock(g[0],h[1]) g[2]=RefineBlock(g[1],h[2]) g[3]=RefineBlock(g[2],h[3]) #g[3]=unpool(g[3],scale=4) output = g[3] if model_type == 'sesnet': in_shape = g[3].shape output = tf.expand_dims(g[3], axis=0) lstm_cell_1 = ConvLSTMCell([in_shape[1], in_shape[2]], in_shape[3] // 2, [3, 3]) lstm_cell_2 = ConvLSTMCell([in_shape[1], in_shape[2]], in_shape[3] // 4, [3, 3]) with tf.variable_scope('rnn_scope_0', reuse=tf.AUTO_REUSE): output0, _ = tf.nn.dynamic_rnn(lstm_cell_1, output, dtype=output.dtype) with tf.variable_scope('rnn_scope_1', reuse=tf.AUTO_REUSE): output1, _ = tf.nn.dynamic_rnn(lstm_cell_2, output0, dtype=output0.dtype) output = tf.squeeze(output1, axis=0) F_score = slim.conv2d(output, 2, 1, activation_fn=tf.nn.relu, normalizer_fn=None) return F_score
def endpoints(image, is_training): if image.get_shape().ndims != 4: raise ValueError('Input must be of size [batch, height, width, 3]') image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3)) with slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)): _, endpoints = resnet_v1_101(image, num_classes=None, is_training=is_training, global_pool=True) endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean( endpoints['resnet_v1_101/block4'], [1, 2], name='pool5') return endpoints, 'resnet_v1_101'
def resnet_v1_101(inputs, is_training, opts): with slim.arg_scope(resnet_v1.resnet_arg_scope( weight_decay=opts.weight_decay, batch_norm_decay=opts.batch_norm_decay, batch_norm_epsilon=opts.batch_norm_epsilon, activation_fn=tf.nn.relu)): return resnet_v1.resnet_v1_101( inputs, num_classes=opts.num_classes, is_training=is_training, global_pool=opts.global_pool, output_stride=None, spatial_squeeze=opts.spatial_squeeze, reuse=None)
def model_resnet_v1_101(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(images, is_training=is_training, scope='resnet_v1_101') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def model(images, weight_decay=1e-5, is_training=True): images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101(images, is_training=is_training, scope='resnet_v1_101') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] for i in range(4): h[i] = slim.conv2d(f[i], 256, 1) for i in range(4): print('Shape of h_{} {}'.format(i, h[i].shape)) g[0] = RefineBlock(high_inputs=None, low_inputs=h[0]) g[1] = RefineBlock(g[0], h[1]) g[2] = RefineBlock(g[1], h[2]) g[3] = RefineBlock(g[2], h[3]) #g[3]=unpool(g[3],scale=4) F_score = slim.conv2d(g[3], 21, 1, activation_fn=tf.nn.relu, normalizer_fn=None) return F_score
def build_FPN(images, config, is_training, backbone='resnet50'): # images: [batch, h, w, channels] # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the # feature pyramid. Each is [batch, height, width, channels] pyramid = {} # build backbone network with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)): if backbone == "resnet50": logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') pyramid['C2'] = end_points[ 'resnet_v1_50/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_50/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_50/block3/unit_5/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_50/block4/unit_3/bottleneck_v1'] elif backbone == "resnet101": logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') pyramid['C2'] = end_points[ 'resnet_v1_101/block1/unit_2/bottleneck_v1'] pyramid['C3'] = end_points[ 'resnet_v1_101/block2/unit_3/bottleneck_v1'] pyramid['C4'] = end_points[ 'resnet_v1_101/block3/unit_22/bottleneck_v1'] pyramid['C5'] = end_points[ 'resnet_v1_101/block4/unit_3/bottleneck_v1'] else: print("Unkown backbone : ", backbone) # build FPN pyramid_feature = {} arg_scope = _extra_conv_arg_scope_with_bn() with tf.variable_scope('FPN'): with slim.arg_scope(arg_scope): pyramid_feature['P5'] = slim.conv2d(pyramid['C5'], config.TOP_DOWN_PYRAMID_SIZE, 1) for i in range(4, 1, -1): upshape = tf.shape(pyramid['C%d' % i]) u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \ size = (upshape[1], upshape[2])) c = slim.conv2d(pyramid['C%d' % i], config.TOP_DOWN_PYRAMID_SIZE, 1) s = tf.add(c, u) pyramid_feature['P%d' % i] = slim.conv2d( s, config.TOP_DOWN_PYRAMID_SIZE, 3) return pyramid_feature
def main(): """ You can also run these commands manually to generate the pb file 1. git clone https://github.com/tensorflow/models.git 2. export PYTHONPATH=Path_to_your_model_folder 3. python alexnet.py """ tf.set_random_seed(1) height, width = 224, 224 inputs = tf.Variable(tf.random_uniform((2, height, width, 3)), name='input') inputs = tf.identity(inputs, "input_node") net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=True) print("nodes in the graph") for n in end_points: print(n + " => " + str(end_points[n])) net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split(',')) run_model(net_outputs, argv[1], 'resnet_v1_101', argv[3] == 'True')
def main(): """ You can also run these commands manually to generate the pb file 1. git clone https://github.com/tensorflow/models.git 2. export PYTHONPATH=Path_to_your_model_folder 3. python alexnet.py """ height, width = 224, 224 inputs = tf.Variable(tf.random_uniform((2, height, width, 3)), name='input') net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=True) print("nodes in the graph") for n in end_points: print(n + " => " + str(end_points[n])) net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split()) run_model(net_outputs, argv[1])
def deep_cnn(input_imgs: tf.Tensor, is_training: bool, cnn_model='original_cnn', summaries: bool = True) -> tf.Tensor: input_tensor = input_imgs if input_tensor.shape[-1] == 1: input_channels = 1 elif input_tensor.shape[-1] == 3: input_channels = 3 else: raise NotImplementedError # Following source code, not paper if cnn_model == "resnet_50": with tf.variable_scope('resnet_50'): cnn_net, _ = resnet_v1_50(input_tensor, is_training=is_training, global_pool=False, on_text=True) elif cnn_model == "resnet_101": with tf.variable_scope('resnet_101'): cnn_net, _ = resnet_v1_101(input_tensor, is_training=is_training, global_pool=False, on_text=True) else: with tf.variable_scope('original_cnn'): cnn_net = original_cnn(input_tensor, input_channels, is_training=is_training, summaries=summaries) with tf.variable_scope('Reshaping_cnn'): shape = cnn_net.get_shape().as_list( ) # [batch, height, width, features] transposed = tf.transpose( cnn_net, perm=[0, 2, 1, 3], name='transposed') # [batch, width, height, features] conv_reshaped = tf.reshape( transposed, [shape[0], -1, shape[1] * shape[3]], name='reshaped') # [batch, width, height x features] return conv_reshaped
def __call__(self, x_input): """Constructs model and return probabilities for given input.""" reuse = True if self.built else None x_input = image_normalize(x_input, normalization_method[5]) x_input = tf.image.resize_images(x_input, [224, 224]) with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101( x_input, num_classes=self.num_classes - 1, is_training=False, reuse=reuse) self.built = True end_points['predictions'] = \ tf.concat([tf.zeros([tf.shape(x_input)[0], 1]), tf.reshape(end_points['predictions'], [-1, 1000])], axis=1) output = end_points['predictions'] # Strip off the extra reshape op at the output return output
def fcn_res101(images, num_classes, is_training=True): with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = resnet_v1.resnet_v1_101(images, 2, is_training=False, global_pool=False, spatial_squeeze=False, output_stride=16) # nn.Conv2D(num_classes, kernel_size=1), # nn.Conv2DTranspose(num_classes, kernel_size=64, padding=16,strides=32) # pool4=end_points['resnet_v1_101/pool4'] # # dconv1_out=pool4.get_shape().as_list() # # # deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1') # # fu1=tf.add(deconv1,pool4) # # # pool3=end_points['resnet_v1_101/pool3'] # dconv2_out=pool3.get_shape().as_list() # deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2') # # fu2=tf.add(deconv2,pool3) # logit = slim.conv2d_transpose(net, 2, [32, 32], stride=16, scope='deconv32') prediction = tf.argmax(logit, dimension=3) #, name="prediction") return logit, prediction
def processing(im_path,dimx,dimy): img = image.load_img(im_path, target_size=(dimx, dimy)) x = image.img_to_array(img) x = np.expand_dims(x, axis=0) x = preprocess_input(x) return x conv_out_train=[] with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) images = tf.placeholder(tf.float32, shape=(1, dimx, dimy, 3)) labels = tf.placeholder(tf.uint8, shape=(1,1)) with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, _ = resnet_v1.resnet_v1_101(images, num_classes=num_classes, is_training=False) probs = tf.argmax(tf.nn.softmax(logits),axis=1) one_hot_labels = slim.one_hot_encoding(labels, num_classes) gt = tf.argmax(one_hot_labels,axis=-1) accuracy = slim.metrics.accuracy(probs, gt) init_fn = get_init_fn(model_path); with tf.Session() as sess: sess.run(tf.initialize_all_variables()) init_fn(sess) #conv_out_train = [] for class_id,prod_per_class in enumerate(product_files_per_class): conv_out = [] for prod in prod_per_class:
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder(dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base(images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base(images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152(images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200(images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
def compute_feature_of_batch_ts_with_cnn(file_path_of_ts, file_path_of_feature, cnn_model_name, file_path_of_pretrained_model): r''' compute feature of somme time series with pretrained CNN :param file_path_of_ts: file path of time series :param file_path_of_feature: file path of saving feature :param cnn_model_name: name of CNN model :param file_path_of_pretrained_model: file path of pretrained CNN :return: '' ''' #tf.reset_default_graph() #read data data = pd.read_csv(file_path_of_ts) #data=data.sample(20) #change dataframe to list id_list = data.iloc[:, 0].tolist() data_list = change_dataframe_to_dict_(data) model = cnn_model_name checkpoint_file = file_path_of_pretrained_model # I only have these because I thought some take in size of (299,299), but maybe not if 'inception' in model: height, width, channels = 224, 224, 3 if 'resnet' in model: height, width, channels = 224, 224, 3 if 'vgg' in model: height, width, channels = 224, 224, 3 if model == 'inception_resnet_v2': height, width, channels = 299, 299, 3 x = tf.placeholder(tf.float32, shape=(1, height, width, channels)) # load up model specific stuff if model == 'inception_v1': #from inception_v1 import * from nets import inception_v1 arg_scope = inception_v1.inception_v1_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_v1.inception_v1(x, is_training=False, num_classes=None) features = end_points['AvgPool_0a_7x7'] # print('logits') # print(logits.shape) # print('features') # print(features.shape) elif model == 'inception_v2': #from inception_v2 import * from nets import inception_v2 arg_scope = inception_v2.inception_v2_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_v2(x, is_training=False, num_classes=None) features = end_points['AvgPool_1a'] elif model == 'inception_v3': #from inception_v3 import * from nets import inception_v3 arg_scope = inception_v3.inception_v3_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_v3(x, is_training=False, num_classes=None) features = end_points['AvgPool_1a'] elif model == 'inception_resnet_v2': #from inception_resnet_v2 import * from nets import inception_resnet_v2 arg_scope = inception_resnet_v2.inception_resnet_v2_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = inception_resnet_v2(x, is_training=False, num_classes=1001) features = end_points['PreLogitsFlatten'] elif model == 'resnet_v1_50': #from resnet_v1 import * from nets import resnet_v1 arg_scope = resnet_v1.resnet_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = resnet_v1.resnet_v1_50(x, is_training=False, num_classes=1000) features = end_points['global_pool'] elif model == 'resnet_v1_101': #from resnet_v1 import * from nets import resnet_v1 arg_scope = resnet_v1.resnet_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = resnet_v1.resnet_v1_101(x, is_training=False, num_classes=1000) features = end_points['global_pool'] elif model == 'vgg_16': #from vgg import * from nets import vgg arg_scope = vgg.vgg_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = vgg.vgg_16(x, is_training=False) features = end_points['vgg_16/fc8'] elif model == 'vgg_19': #from vgg import * from nets import vgg arg_scope = vgg.vgg_arg_scope() with slim.arg_scope(arg_scope): logits, end_points = vgg.vgg_19(x, is_training=False) features = end_points['vgg_19/fc8'] #cpu_config = tf.ConfigProto(intra_op_parallelism_threads = 8, inter_op_parallelism_threads = 8, device_count = {'CPU': 3}) #sess = tf.Session(config = cpu_config) sess = tf.Session() saver = tf.train.Saver() saver.restore(sess, checkpoint_file) feature_list = [] count_temp = 0 for i in range(len(data_list)): count_temp = count_temp + 1 #imaging ts ts_dict = data_list[i] ts = ts_dict['ts'] id = ts_dict['id'] new_ts = min_max_transform(ts) normalized = np.array(new_ts) fig, ax = plt.subplots() #plt.imshow(recurrence_plot.rec_plot(normalized), cmap=plt.cm.gray) plt.imshow(recurrence_plot.rec_plot(normalized)) ax.set_xticks([]) ax.set_yticks([]) #print(id) path = "inception-v1/" + id + ".jpg" plt.savefig(path) plt.close(fig) #compute feature # #begin to compute features image = misc.imread(path) #from matplotlib.pyplot import imread #image=imread(path) # print('image') # print(image.size) image = misc.imresize(image, (height, width)) image = np.expand_dims(image, 0) feature = np.squeeze(sess.run(features, feed_dict={x: image})) feature_list.append(feature) # print('feature-test') # print(feature) os.remove(path) if count_temp % 100 == 0: print(count_temp) #begin to process parellel result and write_to_csv feature_array = np.array(feature_list) feature_df = pd.DataFrame(feature_array) # print(feature_df.shape) # print(len(id_list)) #add id feature_df.insert(loc=0, column='id', value=id_list) # print(feature_final_df.shape) # print(feature_final_df.head()) feature_df.to_csv(file_path_of_feature, index=False) gc.collect()
processed_images, img_ids, labels = dataset.make_one_shot_iterator().get_next() #%% config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) g = tf.get_default_graph() #%% with g.as_default(): if is_save: feature_writer = tf.python_io.TFRecordWriter(feature_tfrecord_filename) img_input_ph = tf.placeholder(dtype=tf.float32, shape=[None, height, width, 3]) with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, _ = resnet_v1.resnet_v1_101(img_input_ph, num_classes=5000, is_training=False) init_fn = slim.assign_from_checkpoint_fn(checkpoints_dir, slim.get_model_variables()) features = g.get_tensor_by_name('resnet_v1_101/pool5:0') idx = 0 init_fn(sess) while True: #idx < 3125: try: processed_images_v, img_ids_v, labels_v = sess.run( [processed_images, img_ids, labels]) features_v = sess.run(features, {img_input_ph: processed_images_v}) print('batch no. {}'.format(idx)) for idx_s in range(features_v.shape[0]): feature = features_v[idx_s, :, :, :]
def resnet_v1_101_8s(image_batch_tensor, number_of_classes, is_training): """Returns the resnet_v1_101_8s model definition. The function returns the model definition of a network that was described in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs' by Chen et al. The network subsamples the input by a factor of 8 and uses the bilinear upsampling kernel to upsample prediction by a factor of 8. This means that if the image size is not of the factor 8, the prediction of different size will be delivered. To adapt the network for an any size input use adapt_network_for_any_size_input(resnet_v1_101_8s, 8). Note: the upsampling kernel is fixed in this model definition, because it didn't give significant improvements according to aforementioned paper. Parameters ---------- image_batch_tensor : [batch_size, height, width, depth] Tensor Tensor specifying input image batch number_of_classes : int An argument specifying the number of classes to be predicted. For example, for PASCAL VOC it is 21. is_training : boolean An argument specifying if the network is being evaluated or trained. Returns ------- upsampled_logits : [batch_size, height, width, number_of_classes] Tensor Tensor with logits representing predictions for each class. Be careful, the output can be of different size compared to input, use adapt_network_for_any_size_input to adapt network for any input size. Otherwise, the input images sizes should be of multiple 8. resnet_v1_101_8s_variables_mapping : dict {string: variable} Dict which maps the resnet_v1_101_8s model's variables to resnet_v1_101 checkpoint variables names. We need this to initilize the weights of resnet_v1_101_8s model with resnet_v1_101 from checkpoint file. Look at ipython notebook for examples. """ with tf.variable_scope("resnet_v1_101_8s") as resnet_v1_101_8s: upsample_factor = 8 # Convert image to float32 before subtracting the # mean pixel value image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] upsample_filter_np = bilinear_upsample_weights(upsample_factor, number_of_classes) upsample_filter_tensor = tf.constant(upsample_filter_np) # TODO: make pull request to get this custom vgg feature accepted # to avoid using custom slim repo. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(mean_centered_image_batch, number_of_classes, is_training=is_training, global_pool=False, output_stride=8) downsampled_logits_shape = tf.shape(logits) # Calculate the ouput size of the upsampled tensor upsampled_logits_shape = tf.pack([ downsampled_logits_shape[0], downsampled_logits_shape[1] * upsample_factor, downsampled_logits_shape[2] * upsample_factor, downsampled_logits_shape[3] ]) # Perform the upsampling upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1]) # Map the original vgg-16 variable names # to the variables in our model. This is done # to make it possible to use assign_from_checkpoint_fn() # while providing this mapping. # TODO: make it cleaner resnet_v1_101_8s_variables_mapping = {} resnet_v1_101_8s_variables = slim.get_variables(resnet_v1_101_8s) for variable in resnet_v1_101_8s_variables: # Here we remove the part of a name of the variable # that is responsible for the current variable scope original_resnet_v1_101_checkpoint_string = variable.name[len(resnet_v1_101_8s.original_name_scope):-2] resnet_v1_101_8s_variables_mapping[original_resnet_v1_101_checkpoint_string] = variable return upsampled_logits, resnet_v1_101_8s_variables_mapping
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 # max_epsilon over checking # get original images origin_img_list=np.sort(glob.glob(FLAGS.origin_img_dir+"*.png")); origin_imgs=np.zeros((len(origin_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float); for i in range(len(origin_img_list)): origin_imgs[i]=imread(origin_img_list[i],mode='RGB').astype(np.float); # get adv images adv_img_list=np.sort(glob.glob(FLAGS.input_dir+"*.png")); adv_imgs=np.zeros((len(adv_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float); for i in range(len(adv_img_list)): adv_imgs[i]=imread(adv_img_list[i],mode='RGB').astype(np.float); epsilon_list=np.linalg.norm(np.reshape(abs(origin_imgs-adv_imgs),[-1,FLAGS.image_height*FLAGS.image_width*3]),ord=np.inf,axis=1); #print(epsilon_list);exit(1); over_epsilon_list=np.zeros((len(origin_img_list),2),dtype=object); cnt=0; for i in range(len(origin_img_list)): file_name=origin_img_list[i].split("/")[-1]; file_name=file_name.split(".")[0]; over_epsilon_list[i,0]=file_name; if(epsilon_list[i]>FLAGS.max_epsilon): over_epsilon_list[i,1]="1"; cnt+=1; tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): # Prepare graph x_input = tf.placeholder(tf.float32, shape=batch_shape) if(FLAGS.checkpoint_file_name=="inception_v3.ckpt"): with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v4.ckpt"): with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points = inception.inception_v4( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_resnet_v2_2016_08_30.ckpt"): with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): _, end_points = inception.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_101.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_101( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_50.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_50( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="resnet_v2_152.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v1.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(inception.inception_v1_arg_scope()): _, end_points = inception.inception_v1( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) elif(FLAGS.checkpoint_file_name=="inception_v2.ckpt"): x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False); with slim.arg_scope(inception.inception_v2_arg_scope()): _, end_points = inception.inception_v2( x_input2, num_classes=num_classes, is_training=False) predicted_labels = tf.argmax(end_points['Predictions'], 1) # Resnet v1 and vgg are not working now elif(FLAGS.checkpoint_file_name=="vgg_16.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1)+1 elif(FLAGS.checkpoint_file_name=="vgg_19.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['vgg_19/fc8'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_50.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_50( x_input, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_101.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 elif(FLAGS.checkpoint_file_name=="resnet_v1_152.ckpt"): x_input_list=tf.unstack(x_input,FLAGS.batch_size,0); for i in range(FLAGS.batch_size): x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224); x_input2=tf.stack(x_input_list,0); with slim.arg_scope(resnet_utils.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152( x_input2, num_classes=num_classes-1, is_training=False) predicted_labels = tf.argmax(end_points['predictions'], 1)+1 # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=FLAGS.checkpoint_path+FLAGS.checkpoint_file_name, master=FLAGS.master) f=open(FLAGS.true_label,"r"); t_label_list=np.array([i[:-1].split(",") for i in f.readlines()]); score=0; with tf.train.MonitoredSession(session_creator=session_creator) as sess: with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filenames, images in load_images(FLAGS.input_dir, batch_shape): labels = sess.run(predicted_labels, feed_dict={x_input: images}) for filename, label in zip(filenames, labels): f_name=filename.split(".")[0]; t_label=int(t_label_list[t_label_list[:,0]==f_name,1][0]); if(t_label!=label): if(over_epsilon_list[over_epsilon_list[:,0]==f_name,1]!="1"): score+=1; #out_file.write('{0},{1}\n'.format(filename, label)) print("Over max epsilon#: "+str(cnt)); print(str(FLAGS.max_epsilon)+" max epsilon Score: "+str(score));
def _construct_model(model_type='resnet_v1_50'): """Constructs model for the desired type of CNN. Args: model_type: Type of model to be used. Returns: end_points: A dictionary from components of the network to the corresponding activations. Raises: ValueError: If the model_type is not supported. """ # Placeholder input. images = array_ops.placeholder( dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE) # Construct model. if model_type == 'inception_resnet_v2': _, end_points = inception.inception_resnet_v2_base(images) elif model_type == 'inception_resnet_v2-same': _, end_points = inception.inception_resnet_v2_base( images, align_feature_maps=True) elif model_type == 'inception_v2': _, end_points = inception.inception_v2_base(images) elif model_type == 'inception_v2-no-separable-conv': _, end_points = inception.inception_v2_base( images, use_separable_conv=False) elif model_type == 'inception_v3': _, end_points = inception.inception_v3_base(images) elif model_type == 'inception_v4': _, end_points = inception.inception_v4_base(images) elif model_type == 'alexnet_v2': _, end_points = alexnet.alexnet_v2(images) elif model_type == 'vgg_a': _, end_points = vgg.vgg_a(images) elif model_type == 'vgg_16': _, end_points = vgg.vgg_16(images) elif model_type == 'mobilenet_v1': _, end_points = mobilenet_v1.mobilenet_v1_base(images) elif model_type == 'mobilenet_v1_075': _, end_points = mobilenet_v1.mobilenet_v1_base( images, depth_multiplier=0.75) elif model_type == 'resnet_v1_50': _, end_points = resnet_v1.resnet_v1_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_101': _, end_points = resnet_v1.resnet_v1_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_152': _, end_points = resnet_v1.resnet_v1_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v1_200': _, end_points = resnet_v1.resnet_v1_200( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_50': _, end_points = resnet_v2.resnet_v2_50( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_101': _, end_points = resnet_v2.resnet_v2_101( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_152': _, end_points = resnet_v2.resnet_v2_152( images, num_classes=None, is_training=False, global_pool=False) elif model_type == 'resnet_v2_200': _, end_points = resnet_v2.resnet_v2_200( images, num_classes=None, is_training=False, global_pool=False) else: raise ValueError('Unsupported model_type %s.' % model_type) return end_points
num_steps_per_epoch = num_batches_per_epoch checkpoints_dir = '/tmp/checkpoints' tf.reset_default_graph() images = tf.placeholder(tf.float32,shape=[None,height_image,width_image,3]) labels = tf.placeholder(tf.float32,shape=[None,3]) learning_rate = tf.placeholder(tf.float32,shape=[]) keep_prob = tf.placeholder(tf.float32,shape=[]) with slim.arg_scope(resnet_arg_scope()): #restore resnet101 model #imgs = tf.map_fn(vgg_preprocessing.preprocess_image(fname, height_image, width_image, data_type) #imgs = [vgg_preprocessing.preprocess_image(fname, height_image, width_image, data_type) for fname in imgs] resnet_logits, end_points = resnet_v1_101(images, num_classes=3, global_pool=True, is_training=True) def feed_dict(batch_size, data_type, epoch): keep_prob_per = keep_prob_val lr = initial_learning_rate if data_type == 1: data = get_images(data_dir,data_type,batch_size) keep_prob_per = keep_prob_val elif data_type == 2: data = get_images(data_dir,data_type,batch_size) keep_prob_per = 1 elif data_type == 3:
def main(): data_path = '<train-CARLA-VP.tfrecords>' model_type = 'vgg-16' train_dir = '<saved_model_path>' est_label = 'horvpz' num_bins = 500 sphere_params = np.load('<carlavp_label_to_horvpz_fov_pitch.npz>') all_bins = sphere_params['all_bins'] all_sphere_centres = sphere_params['all_sphere_centres'] all_sphere_radii = sphere_params['all_sphere_radii'] if est_label == 'horfov': fov_bins = np.arange(15, 115, 100 / num_bins) half_fov_bin_size = (fov_bins[1] - fov_bins[0]) / 2 if model_type == 'inceptionv4': net_width = 299 net_height = 299 else: net_width = 224 net_height = 224 if model_type == 'vgg-m': model = pickle.load(open("<vggm-tf.p>", "rb")) average_image = np.load('<vgg_average_image.npy>') elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101': _R_MEAN = 123.68 _G_MEAN = 116.78 _B_MEAN = 103.94 resnet_average_channels = np.array(np.concatenate( (np.tile(_R_MEAN, (net_height, net_width, 1)), np.tile(_G_MEAN, (net_height, net_width, 1)), np.tile(_B_MEAN, (net_height, net_width, 1))), axis=2), dtype=np.float32) elif model_type == 'inceptionv1' or model_type == 'inceptionv4': print("Nothing needs to be initialized for this cnn model") else: print("ERROR: No such CNN exists") if est_label == 'horfov': no_params_model = 3 elif est_label == 'horvpz': no_params_model = 4 else: print("ERROR: No such 'est_label'") max_batch_size = 60 total_examples = sum(1 for _ in tf.python_io.tf_record_iterator(data_path)) print("Total examples: ", total_examples) divs = np.array(list(factors(total_examples))) sorted_divs = divs[divs.argsort()] batch_size = sorted_divs[sorted_divs < max_batch_size][-1] print("Batch Size:", batch_size) ct = np.arange(11, 12, 4) best_avg_man_loss = np.inf for en, consider_top in enumerate(ct): total_manhattan_loss = np.zeros(5) with tf.Graph().as_default(): tf.logging.set_verbosity(tf.logging.INFO) filename_queue = tf.train.string_input_producer([data_path]) image, label, carla_width, carla_height = util_tfio.general_read_and_decode( filename_queue, num_classes=8, dtype=tf.float64) image = tf.image.resize_images( image, [net_width, net_height], method=tf.image.ResizeMethod.BILINEAR) if model_type == 'vgg-m': image = image - average_image elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101': image = image - resnet_average_channels elif model_type == 'inceptionv1' or model_type == 'inceptionv4': image = tf.cast(image, tf.float32) * (1. / 255) image = (image - 0.5) * 2 else: print("ERROR: No such CNN exists") images, labels, carla_widths, carla_heights = tf.train.batch( [image, label, carla_width, carla_height], batch_size=batch_size, num_threads=1, capacity=5 * batch_size) print(images) if model_type == 'vgg-m': logits = vgg_m.cnn_vggm(images, num_classes=num_bins * no_params_model, model=model) elif model_type == 'resnet50': with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits, _ = resnet_v1.resnet_v1_50( images, num_classes=num_bins * no_params_model, is_training=False, global_pool=True) # , reuse=True)# elif model_type == 'resnet101': with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope: logits, _ = resnet_v1.resnet_v1_101( images, num_classes=num_bins * no_params_model, is_training=False, global_pool=True) # , reuse=True)# elif model_type == 'vgg-16': with slim.arg_scope(vgg.vgg_arg_scope()) as scope: logits, _ = vgg.vgg_16( images, num_classes=num_bins * no_params_model, is_training=False ) # , global_pool=False)#, reuse=True)# elif model_type == 'inceptionv1': with slim.arg_scope( inception_v1.inception_v1_arg_scope()) as scope: logits, _ = inception_v1.inception_v1( images, num_classes=num_bins * no_params_model, is_training=False ) # , global_pool=False)#, reuse=True)# elif model_type == 'inceptionv4': with slim.arg_scope( inception_v4.inception_v4_arg_scope()) as scope: logits, _ = inception_v4.inception_v4( images, num_classes=num_bins * no_params_model, is_training=False ) # , global_pool=False)#, reuse=True)# else: print("ERROR: No such CNN exists") checkpoint_path = train_dir init_fn = slim.assign_from_checkpoint_fn( checkpoint_path, slim.get_variables_to_restore()) print("--------------------------------------------------------") print("No. of examples not evaluated because of batch size:", np.mod(total_examples, batch_size)) print("--------------------------------------------------------") with tf.Session() as sess: with slim.queues.QueueRunners(sess): sess.run(tf.initialize_local_variables()) init_fn(sess) for loop_no in range( int(np.floor(total_examples / batch_size))): np_rawpreds, np_images_raw, np_labels, np_width, np_height = sess.run( [ logits, images, labels, carla_widths, carla_heights ]) for i in range(batch_size): pred_indices = np.zeros(no_params_model, dtype=np.int32) output_vals = np_rawpreds[i, :].squeeze().reshape( no_params_model, -1) for ln in range(no_params_model): predsoft = my_softmax( output_vals[ln, :][np.newaxis]).squeeze() topindices = predsoft.argsort( )[::-1][:consider_top] probsindices = predsoft[topindices] / np.sum( predsoft[topindices]) pred_indices[ln] = np.abs( int( np.round( np.sum(probsindices * topindices)))) if est_label == 'horfov': estimated_input_points = get_horvpz_from_projected_4indices_modified( np.hstack( (pred_indices[:2], 0, 0)), all_bins, all_sphere_centres, all_sphere_radii) my_fov = fov_bins[ pred_indices[2]] + half_fov_bin_size fx, fy, roll_from_horizon, my_tilt = get_intrinisic_extrinsic_params_from_horfov( img_dims=(np_width[i], np_height[i]), horizonvector=estimated_input_points, fov=my_fov, net_dims=(net_width, net_height)) elif est_label == 'horvpz': estimated_input_points = get_horvpz_from_projected_4indices_modified( pred_indices[:4], all_bins, all_sphere_centres, all_sphere_radii) fx, fy, roll_from_horizon, my_tilt = \ get_intrinisic_extrinsic_params_from_horizonvector_vpz( img_dims=(np_width[i], np_height[i]), horizonvector_vpz=estimated_input_points, net_dims=(net_width, net_height)) my_fov_fx = degrees( np.arctan(np_width[i] / (2 * fx)) * 2) my_fov_fy = degrees( np.arctan(np_width[i] / (2 * fy)) * 2) my_tilt = -degrees(my_tilt) roll_from_horizon = roll_from_horizon gt_label = np_labels[i, :].reshape(4, -1) gt_fov = gt_label[3, 0] gt_pitch = gt_label[3, 1] gt_roll = degrees( atan((gt_label[1, 1] - gt_label[0, 1]) / (gt_label[1, 0] - gt_label[0, 0]))) manhattan_loss = [ np.abs(my_fov_fx - gt_fov), np.abs(my_fov_fy - gt_fov), np.abs(((my_fov_fx + my_fov_fy) / 2) - gt_fov), np.abs(my_tilt - gt_pitch), np.abs(roll_from_horizon - gt_roll) ] total_manhattan_loss += manhattan_loss avg_manhattan_loss = total_manhattan_loss / total_examples print("ct:", consider_top, "Average manhattan loss per scalar: ", avg_manhattan_loss) print( "-------------------------------------------------------------------" ) this_loss = np.mean( np.hstack((avg_manhattan_loss[1], avg_manhattan_loss[3:]))) if this_loss < best_avg_man_loss: best_avg_man_loss = this_loss display_loss = [ consider_top, -1, avg_manhattan_loss[1], avg_manhattan_loss[3], avg_manhattan_loss[4] ] print("Best loss:", display_loss)
def batch_prediction(frame_id_to_path, frame_id_to_image_ids, image_id_to_coordinates, model, image_size, sess, \ debug=_prediction_debug): print "batch processing: " + str(len(image_id_to_coordinates)) if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4' or \ model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or model == 'mobilenet_v1_1.0_224' or \ model == 'inception_resnet_v2' or model == 'nasnet_mobile' or model == 'nasnet_large': preprocessing_type = 'inception' elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': preprocessing_type = 'vgg' image_id_to_predictions = {} image_ids = [] count = 0 start_time_1 = time.time() for frame_id, path in frame_id_to_path.iteritems(): frame_string = open(path, 'rb').read() frame = tf.image.decode_jpeg(frame_string, channels=3) #plt.imshow(PIL.Image.open(StringIO.StringIO(sess.run(tf.image.encode_jpeg(frame))))) #plt.show() frame_np = cv2.imread(path, cv2.IMREAD_COLOR) frame_height, frame_width = frame_np.shape[:2] #print frame_np.shape if preprocessing_type == 'inception': processed_frame = preprocess_for_inception(frame, frame_height, frame_width, sess, central_fraction=1.0, debug=_prediction_debug) elif preprocessing_type == 'vgg': processed_frame = preprocess_for_vgg(frame, frame_height, frame_width, frame_height, sess, debug=_prediction_debug) start_time = time.time() height, width = processed_frame.shape[:2].as_list() #print "Size: "+str(width)+", "+str(height) #plt.imshow(PIL.Image.open(StringIO.StringIO(sess.run(tf.image.encode_jpeg(tf.cast(processed_frame, tf.uint8)))))) #plt.show() for image_id in frame_id_to_image_ids[frame_id]: fields = image_id_to_coordinates[image_id].split('\t') x = int(width * float(fields[0])) y = int(height * float(fields[1])) w = int(width * float(fields[2])) h = int(height * float(fields[3])) processed_image = tf.image.crop_to_bounding_box( processed_frame, y, x, h, w) if debug: print "object at " + str(fields) print str(x) + ", " + str(y) + ", " + str(w) + ", " + str( h) + ", " + str(frame_height - y - h) if preprocessing_type == 'vgg': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast(processed_image, tf.uint8)))))) elif preprocessing_type == 'inception': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast( tf.multiply(processed_image, 255), tf.uint8)))))) plt.show() processed_image = tf.image.resize_images(processed_image, (image_size, image_size)) if debug: print "resized" if preprocessing_type == 'vgg': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast(processed_image, tf.uint8)))))) elif preprocessing_type == 'inception': plt.imshow( PIL.Image.open( StringIO.StringIO( sess.run( tf.image.encode_jpeg( tf.cast( tf.multiply(processed_image, 255), tf.uint8)))))) plt.show() if count == 0: processed_images = tf.expand_dims(processed_image, 0) else: local_matrix = tf.expand_dims(processed_image, 0) processed_images = tf.concat([processed_images, local_matrix], 0) image_ids.append(image_id) count = count + 1 print "Preparation: " + str(time.time() - start_time_1) + " seconds" start_time = time.time() if model == 'inception_v1': logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) elif model == 'inception_v2': logits, _ = inception.inception_v2(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v2.ckpt'), slim.get_model_variables('InceptionV2')) elif model == 'inception_v3': logits, _ = inception.inception_v3(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v3.ckpt'), slim.get_model_variables('InceptionV3')) elif model == 'inception_v4': logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) elif model == 'resnet_v1_50': logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif model == 'resnet_v1_101': logits, _ = resnet_v1.resnet_v1_101(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif model == 'resnet_v1_152': logits, _ = resnet_v1.resnet_v1_152(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) elif model == 'mobilenet_v1_0.25_128': logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \ depth_multiplier=0.25) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'mobilenet_v1_0.25_128.ckpt'), slim.get_model_variables('MobilenetV1')) elif model == 'mobilenet_v1_0.50_160': logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \ depth_multiplier=0.50) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'mobilenet_v1_0.50_160.ckpt'), slim.get_model_variables('MobilenetV1')) elif model == 'mobilenet_v1_1.0_224': logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \ depth_multiplier=1.0) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'mobilenet_v1_1.0_224.ckpt'), slim.get_model_variables('MobilenetV1')) elif model == 'inception_resnet_v2': logits, _ = inception_resnet_v2.inception_resnet_v2(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_resnet_v2_2016_08_30.ckpt'), slim.get_model_variables('InceptionResnetV2')) elif model == 'nasnet_mobile': logits, _ = nasnet.build_nasnet_mobile(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'model.ckpt'), slim.get_model_variables()) elif model == 'nasnet_large': logits, _ = nasnet.build_nasnet_large(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'model.ckpt'), slim.get_model_variables()) elif model == 'vgg_16': logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) print "Prediction2.1: " + str(time.time() - start_time) + " seconds" start_time = time.time() init_fn(sess) print "Prediction2.2: " + str(time.time() - start_time) + " seconds" probabilities = tf.nn.softmax(logits) start_time = time.time() np_image, probabilities = sess.run([frame, probabilities]) runtime = time.time() - start_time print "Prediction: " + str(runtime) + " seconds" for k in range(len(image_ids)): image_id = image_ids[k] predictions = [] prob = probabilities[k, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-prob), key=lambda x: x[1]) ] for i in range(5): index = sorted_inds[i] if model == 'inception_v1' or model == 'inception_v2' or \ model == 'inception_v3' or model == 'inception_v4' or \ model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or model == 'mobilenet_v1_1.0_224' or \ model == 'inception_resnet_v2' or model == 'nasnet_mobile' or model == 'nasnet_large': name = names[index] elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': name = names[index + 1] pr = prob[index] pair = (name, pr) predictions.append(pair) image_id_to_predictions[image_id] = predictions return image_id_to_predictions, runtime, sess
def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'): """Create the graph of the resnetv1_101 model. """ # Parse input arguments into class variables if weights_path == 'DEFAULT': self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_101.ckpt" else: self.WEIGHTS_PATH = weights_path self.train_layers = train_layers with tf.variable_scope("input"): self.image_size = resnet_v1.resnet_v1_101.default_image_size self.x_input = tf.placeholder( tf.float32, [None, self.image_size, self.image_size, 3], name="x_input") self.y_input = tf.placeholder(tf.float32, [None, num_classes], name="y_input") self.learning_rate = tf.placeholder(tf.float32, name="learning_rate") # train with arg_scope(resnet_v1.resnet_arg_scope()): self.logits, _ = resnet_v1.resnet_v1_101(self.x_input, num_classes=num_classes, is_training=True, reuse=tf.AUTO_REUSE) # validation with arg_scope(resnet_v1.resnet_arg_scope()): self.logits_val, _ = resnet_v1.resnet_v1_101( self.x_input, num_classes=num_classes, is_training=False, reuse=tf.AUTO_REUSE) with tf.name_scope("loss"): self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits, labels=self.y_input)) self.loss_val = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=self.logits_val, labels=self.y_input)) with tf.name_scope("train"): self.global_step = tf.Variable(0, name="global_step", trainable=False) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) var_list = [ v for v in tf.trainable_variables() if v.name.split('/')[-2] in train_layers or v.name.split('/')[-3] in train_layers ] gradients = tf.gradients(self.loss, var_list) self.grads_and_vars = list(zip(gradients, var_list)) optimizer = tf.train.GradientDescentOptimizer(self.learning_rate) with tf.control_dependencies(update_ops): self.train_op = optimizer.apply_gradients( grads_and_vars=self.grads_and_vars, global_step=self.global_step) with tf.name_scope("probability"): self.probability = tf.nn.softmax(self.logits_val, name="probability") with tf.name_scope("prediction"): self.prediction = tf.argmax(self.logits_val, 1, name="prediction") with tf.name_scope("accuracy"): correct_prediction = tf.equal(self.prediction, tf.argmax(self.y_input, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet resnet_v1_50/block1 (?, ?, ?, 256) resnet_v1_50/block2 (?, ?, ?, 512) resnet_v1_50/block3 (?, ?, ?, 1024) resnet_v1_50/block4 (?, ?, ?, 2048) Shape of f_0 (?, ?, ?, 2048) Shape of f_1 (?, ?, ?, 512) Shape of f_2 (?, ?, ?, 256) Shape of f_3 (?, ?, ?, 64) Shape of h_0 (?, ?, ?, 2048), g_0 (?, ?, ?, 2048) Shape of h_1 (?, ?, ?, 128), g_1 (?, ?, ?, 128) Shape of h_2 (?, ?, ?, 64), g_2 (?, ?, ?, 64) Shape of h_3 (?, ?, ?, 32), g_3 (?, ?, ?, 32) ''' F_score = [] F_geometry = [] images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): # logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') logits, end_points = resnet_v1.resnet_v1_101(images, is_training=is_training, scope='resnet_v1_101') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: # 最底层h1 f[i] = slim.conv2d(f[i], 2048, 1) # 最底层的1x1conv h[i] = f[i] else: f[i] = slim.conv2d(f[i], num_outputs[i], 1) # f:1,2,3 的 1x1conv c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: # 中间层 h2,h3, g[i] = unpool(h[i]) else: # 最高层 h4 对h4进行卷积预测 g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map f_score_32 = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale # geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] f_geometry_32 = tf.concat([geo_map, angle_map], axis=-1) F_score.append(f_score_32) F_geometry.append(f_geometry_32) g[2] = slim.conv2d(h[2], 64, 3) f_score_64 = slim.conv2d(g[2], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[2], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale # geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) angle_map = (slim.conv2d( g[2], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] f_geometry_64 = tf.concat([geo_map, angle_map], axis=-1) F_score.append(f_score_64) F_geometry.append(f_geometry_64) return F_score, F_geometry
(img_test_ids,test_img,test_labels,test_attributes) = dataset_in_3.make_one_shot_iterator().get_next() (img_test_ids,test_img_v,test_labels,test_attributes)=sess.run([img_test_ids,test_img,test_labels,test_attributes]) test_attributes = test_attributes[:,:,0] #%% #sparse_dict_img_id = tf.constant(sparse_dict_img_id) #sparse_dict_img = tf.constant(sparse_dict_img) #sparse_dict_label = tf.constant(sparse_dict_label) #sparse_dict_Attributes = tf.constant(sparse_dict_Attributes) #%% image_size = resnet_v1.resnet_v1_101.default_image_size height = image_size width = image_size img_input_ph = tf.placeholder(dtype=tf.float32,shape=[None,height,width,3])#tf.concat([img,sparse_dict_img],axis = 0,name='img_input_point') #%% with slim.arg_scope(resnet_v1.resnet_arg_scope()): logit, end_points = resnet_v1.resnet_v1_101(img_input_ph, num_classes=1000, is_training=is_use_batch_norm,reuse=tf.AUTO_REUSE) # init_fn = slim.assign_from_checkpoint_fn(checkpoints_dir,slim.get_model_variables()) features_concat = g.get_tensor_by_name('resnet_v1_101/pool5:0') #%% features_concat = tf.squeeze(features_concat) features_concat = tf.concat([features_concat,tf.ones([tf.shape(features_concat)[0],1])],axis = 1,name='feature_input_point') index_point = tf.placeholder(dtype=tf.int32,shape=()) F = features_concat[:index_point,:] sparse_dict = features_concat[index_point:,:] F_concat_ph = g.get_tensor_by_name('feature_input_point:0') #%% alpha_colaborative_var = tf.get_variable('alphha_colaborative',dtype=tf.float32,trainable=False, shape=()) alpha_colaborative_var_fh = tf.placeholder(dtype=tf.float32, shape=()) alpha_feature_var = tf.get_variable('alpha_feature',dtype=tf.float32,trainable=False, shape=())
def resnet_v1_101_16s(image_batch_tensor, number_of_classes, is_training): """Returns the resnet_v1_101_16s model definition. The function returns the model definition of a network that was described in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets, Atrous Convolution, and Fully Connected CRFs' by Chen et al. The network subsamples the input by a factor of 16 and uses the bilinear upsampling kernel to upsample prediction by a factor of 16. This means that if the image size is not of the factor 16, the prediction of different size will be delivered. To adapt the network for an any size input use adapt_network_for_any_size_input(resnet_v1_101_16s, 16). Note: the upsampling kernel is fixed in this model definition, because it didn't give significant improvements according to aforementioned paper. Parameters ---------- image_batch_tensor : [batch_size, height, width, depth] Tensor Tensor specifying input image batch number_of_classes : int An argument specifying the number of classes to be predicted. For example, for PASCAL VOC it is 21. is_training : boolean An argument specifying if the network is being evaluated or trained. Returns ------- upsampled_logits : [batch_size, height, width, number_of_classes] Tensor Tensor with logits representing predictions for each class. Be careful, the output can be of different size compared to input, use adapt_network_for_any_size_input to adapt network for any input size. Otherwise, the input images sizes should be of multiple 8. resnet_v1_101_16s_variables_mapping : dict {string: variable} Dict which maps the resnet_v1_101_16s model's variables to resnet_v1_101 checkpoint variables names. We need this to initilize the weights of resnet_v1_101_16s model with resnet_v1_101 from checkpoint file. Look at ipython notebook for examples. """ with tf.variable_scope("resnet_v1_101_16s") as resnet_v1_101_16s: upsample_factor = 16 # Convert image to float32 before subtracting the # mean pixel value image_batch_float = tf.to_float(image_batch_tensor) # Subtract the mean pixel value from each pixel mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN] upsample_filter_np = bilinear_upsample_weights(upsample_factor, number_of_classes) upsample_filter_tensor = tf.constant(upsample_filter_np) # TODO: make pull request to get this custom vgg feature accepted # to avoid using custom slim repo. with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101(mean_centered_image_batch, number_of_classes, is_training=is_training, global_pool=False, output_stride=16) downsampled_logits_shape = tf.shape(logits) # Calculate the ouput size of the upsampled tensor upsampled_logits_shape = tf.pack([ downsampled_logits_shape[0], downsampled_logits_shape[1] * upsample_factor, downsampled_logits_shape[2] * upsample_factor, downsampled_logits_shape[3] ]) # Perform the upsampling upsampled_logits = tf.nn.conv2d_transpose(logits, upsample_filter_tensor, output_shape=upsampled_logits_shape, strides=[1, upsample_factor, upsample_factor, 1]) # Map the original vgg-16 variable names # to the variables in our model. This is done # to make it possible to use assign_from_checkpoint_fn() # while providing this mapping. # TODO: make it cleaner resnet_v1_101_16s_variables_mapping = {} resnet_v1_101_16s_variables = slim.get_variables(resnet_v1_101_16s) for variable in resnet_v1_101_16s_variables: # Here we remove the part of a name of the variable # that is responsible for the current variable scope original_resnet_v1_101_checkpoint_string = variable.name[len(resnet_v1_101_16s.original_name_scope):-2] resnet_v1_101_16s_variables_mapping[original_resnet_v1_101_checkpoint_string] = variable return upsampled_logits, resnet_v1_101_16s_variables_mapping
def batch_prediction(image_id_to_path, model, sess): print "batch processing: " + str(len(image_id_to_path)) image_id_to_predictions = {} image_ids = [] count = 0 start_time_1 = time.time() for image_id, path in image_id_to_path.iteritems(): image_string = open(path, 'rb').read() image = tf.image.decode_jpeg(image_string, channels=3) if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4': processed_image = preprocess_for_inception(image, image_size, image_size, central_fraction=1.0) elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': processed_image = vgg_preprocessing.preprocess_image( image, image_size, image_size, is_training=False) start_time = time.time() #print processed_image.shape #np_val = sess.run(processed_image) #print np_val.shape #processed_image = tf.convert_to_tensor(np_val) #print processed_image.shape #print "conversion: "+str(time.time()-start_time)+" seconds" if count == 0: processed_images = tf.expand_dims(processed_image, 0) else: local_matrix = tf.expand_dims(processed_image, 0) processed_images = tf.concat([processed_images, local_matrix], 0) image_ids.append(image_id) count = count + 1 print "Preparation: " + str(time.time() - start_time_1) + " seconds" start_time = time.time() if model == 'inception_v1': logits, _ = inception.inception_v1(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v1.ckpt'), slim.get_model_variables('InceptionV1')) elif model == 'inception_v2': logits, _ = inception.inception_v2(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v2.ckpt'), slim.get_model_variables('InceptionV2')) elif model == 'inception_v3': logits, _ = inception.inception_v3(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v3.ckpt'), slim.get_model_variables('InceptionV3')) elif model == 'inception_v4': logits, _ = inception.inception_v4(processed_images, num_classes=1001, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'inception_v4.ckpt'), slim.get_model_variables('InceptionV4')) elif model == 'resnet_v1_50': logits, _ = resnet_v1.resnet_v1_50(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50')) elif model == 'resnet_v1_101': logits, _ = resnet_v1.resnet_v1_101(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101')) elif model == 'resnet_v1_152': logits, _ = resnet_v1.resnet_v1_152(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152')) elif model == 'vgg_16': logits, _ = vgg.vgg_16(processed_images, num_classes=1000, is_training=False) init_fn = slim.assign_from_checkpoint_fn( os.path.join(checkpoints_dir, 'vgg_16.ckpt'), slim.get_model_variables('vgg_16')) print "Prediction2.1: " + str(time.time() - start_time) + " seconds" start_time = time.time() init_fn(sess) print "Prediction2.2: " + str(time.time() - start_time) + " seconds" probabilities = tf.nn.softmax(logits) print "Prediction1: " + str(time.time() - start_time) + " seconds" start_time = time.time() np_image, probabilities = sess.run([image, probabilities]) runtime = time.time() - start_time print "Prediction: " + str(runtime) + " seconds" for k in range(len(image_ids)): image_id = image_ids[k] predictions = [] prob = probabilities[k, 0:] sorted_inds = [ i[0] for i in sorted(enumerate(-prob), key=lambda x: x[1]) ] for i in range(5): index = sorted_inds[i] if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4': name = names[index] elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152': name = names[index + 1] pr = prob[index] pair = (name, pr) predictions.append(pair) image_id_to_predictions[image_id] = predictions return image_id_to_predictions, runtime, sess
def model(images, valid_affines, seq_len, mask, weight_decay=1e-5, is_training=True, model=FLAGS.base_model): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images, [128, 128, 128]) if model == "reset_v1_50": with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50( images, is_training=is_training, scope='resnet_v1_50') features = ['pool5', 'pool4', 'pool3', 'pool2'] elif model == "resnet_v1_101": with slim.arg_scope( resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_101( images, is_training=is_training, scope='resnet_v1_101') features = ['pool5', 'pool4', 'pool3', 'pool2'] elif model == "resnet_v2_101": with slim.arg_scope( resnet_v2.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v2.resnet_v2_101( images, is_training=is_training, scope='resnet_v2_101') features = ['pool5', 'pool4', 'pool3', 'pool2'] elif model == "inception_v4": with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits, end_points = inception_v4.inception_v4( images, num_classes=None, is_training=is_training, scope='inception_v4') features = ['Mixed_7b', 'Mixed_6b' 'Mixed5a', 'Mixed_3a'] elif model == "inception_resnet_v2": with slim.arg_scope( inception_resnet_v2.inception_resnet_v2_arg_scope()): logits, end_points = inception_resnet_v2.inception_resnet_v2( images, num_classes=None, is_training=is_training, scope='inception_resnet_v2') features = ['Mixed_7a', 'Mixed_6a', 'Mixed_5b', 'MaxPool_3a_3x3'] #pretty(end_points) with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points[fea] for fea in features] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) c1_2 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) a = slim.conv2d(slim.conv2d(c1_1, num_outputs[i], 3), num_outputs[i] // 2, 3) b = slim.conv2d(c1_2, num_outputs[i] // 2, 3) h[i] = tf.concat([a, b], axis=-1) #h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: # g[i] = slim.conv2d(slim.conv2d(h[i], num_outputs[i], 3), num_outputs[i], 3) g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) print('Shape before ROI rotate: {}'.format(g[3].shape)) text_proposals = roi_rotate(g[3], valid_affines, mask) rotated_image = roi_rotate_test(images, valid_affines, mask) print('Shape after ROI rotate: {}'.format(text_proposals.shape)) recon_f = slim.conv2d(text_proposals, 64, 3) recon_f = slim.conv2d(recon_f, 64, 3) recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1]) recon_f = slim.conv2d(recon_f, 128, 3) recon_f = slim.conv2d(recon_f, 128, 3) recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1]) recon_f = slim.conv2d(recon_f, 256, 3) recon_f = slim.conv2d(recon_f, 256, 3) recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1]) logits = lstm_ctc(recon_f, seq_len) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry, logits, text_proposals, g[3], rotated_image
def main(_): batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3] num_classes = 1001 ensemble_type = FLAGS.ensemble_type tf.logging.set_verbosity(tf.logging.INFO) checkpoint_path_list = [ FLAGS.checkpoint_path_inception_v1, FLAGS.checkpoint_path_inception_v2, FLAGS.checkpoint_path_inception_v3, FLAGS.checkpoint_path_inception_v4, FLAGS.checkpoint_path_inception_resnet_v2, FLAGS.checkpoint_path_resnet_v1_101, FLAGS.checkpoint_path_resnet_v1_152, FLAGS.checkpoint_path_resnet_v2_101, FLAGS.checkpoint_path_resnet_v2_152, FLAGS.checkpoint_path_vgg_16, FLAGS.checkpoint_path_vgg_19 ] normalization_method = [ 'default', 'default', 'default', 'default', 'global', 'caffe_rgb', 'caffe_rgb', 'default', 'default', 'caffe_rgb', 'caffe_rgb' ] pred_list = [] for idx, checkpoint_path in enumerate(checkpoint_path_list, 1): with tf.Graph().as_default(): if int(FLAGS.test_idx) == 20 and idx in [3]: continue if int(FLAGS.test_idx) in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 ] and int(FLAGS.test_idx) != idx: continue # Prepare graph if idx in [1, 2, 6, 7, 10, 11]: _x_input = tf.placeholder(tf.float32, shape=batch_shape) x_input = tf.image.resize_images(_x_input, [224, 224]) else: _x_input = tf.placeholder(tf.float32, shape=batch_shape) x_input = _x_input x_input = image_normalize(x_input, normalization_method[idx - 1]) if idx == 1: with slim.arg_scope(inception.inception_v1_arg_scope()): _, end_points = inception.inception_v1( x_input, num_classes=num_classes, is_training=False) elif idx == 2: with slim.arg_scope(inception.inception_v2_arg_scope()): _, end_points = inception.inception_v2( x_input, num_classes=num_classes, is_training=False) elif idx == 3: with slim.arg_scope(inception.inception_v3_arg_scope()): _, end_points = inception.inception_v3( x_input, num_classes=num_classes, is_training=False) elif idx == 4: with slim.arg_scope(inception.inception_v4_arg_scope()): _, end_points = inception.inception_v4( x_input, num_classes=num_classes, is_training=False) elif idx == 5: with slim.arg_scope(inception.inception_resnet_v2_arg_scope()): _, end_points = inception.inception_resnet_v2( x_input, num_classes=num_classes, is_training=False) elif idx == 6: with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_101(x_input, num_classes=1000, is_training=False) elif idx == 7: with slim.arg_scope(resnet_v1.resnet_arg_scope()): _, end_points = resnet_v1.resnet_v1_152(x_input, num_classes=1000, is_training=False) elif idx == 8: with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_101( x_input, num_classes=num_classes, is_training=False) elif idx == 9: with slim.arg_scope(resnet_v2.resnet_arg_scope()): _, end_points = resnet_v2.resnet_v2_152( x_input, num_classes=num_classes, is_training=False) elif idx == 10: with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_16(x_input, num_classes=1000, is_training=False) end_points['predictions'] = tf.nn.softmax( end_points['vgg_16/fc8']) elif idx == 11: with slim.arg_scope(vgg.vgg_arg_scope()): _, end_points = vgg.vgg_19(x_input, num_classes=1000, is_training=False) end_points['predictions'] = tf.nn.softmax( end_points['vgg_19/fc8']) #end_points = tf.reduce_mean([end_points1['Predictions'], end_points2['Predictions'], end_points3['Predictions'], end_points4['Predictions']], axis=0) #predicted_labels = tf.argmax(end_points, 1) # Run computation saver = tf.train.Saver(slim.get_model_variables()) session_creator = tf.train.ChiefSessionCreator( scaffold=tf.train.Scaffold(saver=saver), checkpoint_filename_with_path=checkpoint_path, master=FLAGS.master) pred_in = [] filenames_list = [] with tf.train.MonitoredSession( session_creator=session_creator) as sess: for filenames, images in load_images(FLAGS.input_dir, batch_shape): #if idx in [1,2,6,7,10,11]: # # 16x299x299x3 # images = zoom(images, (1, 0.7491638795986622, 0.7491638795986622, 1), order=2) filenames_list.extend(filenames) end_points_dict = sess.run(end_points, feed_dict={_x_input: images}) if idx in [6, 7, 10, 11]: end_points_dict['predictions'] = \ np.concatenate([np.zeros([FLAGS.batch_size, 1]), np.array(end_points_dict['predictions'].reshape(-1, 1000))], axis=1) try: pred_in.extend(end_points_dict['Predictions'].reshape( -1, num_classes)) except KeyError: pred_in.extend(end_points_dict['predictions'].reshape( -1, num_classes)) pred_list.append(pred_in) if ensemble_type == 'mean': pred = np.mean(pred_list, axis=0) labels = np.argmax( pred, axis=1 ) # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch elif ensemble_type == 'vote': pred = np.argmax( pred_list, axis=2 ) # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch labels = np.median(pred, axis=0) with tf.gfile.Open(FLAGS.output_file, 'w') as out_file: for filename, label in zip(filenames_list, labels): out_file.write('{0},{1}\n'.format(filename, label))
def main(_): if not FLAGS.dataset_dir: raise ValueError( 'You must supply the dataset directory with --dataset_dir') dropout_val = 0.8 is_flip = True is_smoothing = True maintain_aspect_ratio = True min_perc = 0.90 is_random_crops = False max_rotation = 0 num_bins = 500 no_output_params = 4 num_classes = no_output_params * num_bins eval_num_classes = 7 * num_bins num_samples = sum( 1 for _ in tf.python_io.tf_record_iterator(FLAGS.dataset_dir)) print("No. of training examples: ", num_samples) assert max_rotation >= 0 print('---------------------------------------------------------') print('Make sure that no. of training samples is actually ' + str(num_samples)) print('---------------------------------------------------------') if FLAGS.model_name == 'inception-v4': net_width = 299 net_height = 299 else: net_width = 224 net_height = 224 tf.logging.set_verbosity(tf.logging.INFO) with tf.Graph().as_default(): global_step = slim.create_global_step() data_path = FLAGS.dataset_dir filename_queue = tf.train.string_input_producer([data_path]) image, label, carla_width, carla_height = util_tfio.general_read_and_decode( filename_queue, num_classes=8, dtype=tf.float64) print(image) print(label) # -------------------------------------------------------------------------------------------------------------------- degree_angle = tf.random_uniform([], minval=-max_rotation, maxval=max_rotation, dtype=tf.float32) radian_angle = util_tfgeometry.tf_deg2rad(degree_angle) label = tf.reshape(label, (4, 2)) # my_fov = label[3, 0] # my_pitch = label[3, 1] label = label[:3, :] if is_flip: image, bool_flip = util_tfimage.random_flip_left_right(image) def flip_gt(): return tf.stack( ([[ tf.cast(carla_width, label.dtype) - label[1, 0], label[1, 1] ], [ tf.cast(carla_width, label.dtype) - label[0, 0], label[0, 1] ], [ tf.cast(carla_width, label.dtype) - label[2, 0], label[2, 1] ]])) def gt(): return label label = tf.cond(bool_flip, flip_gt, gt) if max_rotation > 0: # image rotation is buggy on GPU with tf.device('/cpu:0'): image = tf.contrib.image.rotate(image, radian_angle, interpolation='BILINEAR') max_width, max_height = util_tfgeometry.rotatedRectWithMaxArea_tf( carla_width, carla_height, radian_angle) max_height = tf.cast(tf.floor(max_height), tf.int32) max_width = tf.cast(tf.floor(max_width), tf.int32) print("max_width, height", max_width, max_height) image = tf.image.resize_image_with_crop_or_pad( image, target_height=max_height, target_width=max_width) rot_vps = util_tfgeometry.rotate_vps( (carla_width / 2, carla_height / 2), label, tf.cast(radian_angle, dtype=tf.float64)) crop_rot_vps = util_tfgeometry.center_crop_vps( rot_vps, orig_dims=(carla_width, carla_height), crop_dims=(max_width, max_height)) else: max_width = carla_width max_height = carla_height crop_rot_vps = label if maintain_aspect_ratio: image, max_width, max_height = util_tfimage.square_random_crop( image, max_width, max_height) if not is_random_crops: image = tf.image.resize_images( image, [net_width, net_height], method=tf.image.ResizeMethod.BILINEAR) float_max_height = tf.cast(max_height, tf.float64) float_max_width = tf.cast(max_width, tf.float64) final_vps = util_tfgeometry.resize_vps( crop_rot_vps, orig_dims=(float_max_width, float_max_height), resize_dims=(net_width, net_height)) else: rand_perc = tf.random_uniform([], minval=min_perc, maxval=1.0) crop_height = tf.maximum( net_height, tf.cast(tf.floor(rand_perc * tf.cast(max_height, tf.float32)), dtype=tf.int32)) crop_width = tf.maximum( net_width, tf.cast(tf.floor(rand_perc * tf.cast(max_width, tf.float32)), dtype=tf.int32)) image, off_height, off_width = vgg_preprocessing._custom_random_crop( [image], crop_height, crop_width)[0] image = tf.image.resize_images( image, [net_width, net_height], method=tf.image.ResizeMethod.BILINEAR) temp_final_vps = util_tfgeometry.offset_vps( crop_rot_vps, off_height, off_width) float_crop_height = tf.cast(crop_height, tf.float64) float_crop_width = tf.cast(crop_width, tf.float64) final_vps = util_tfgeometry.resize_vps( temp_final_vps, orig_dims=(float_crop_width, float_crop_height), resize_dims=(net_width, net_height)) image = util_tfimage.distort_color(image, color_ordering=tf.random_uniform( [], minval=0, maxval=4, dtype=tf.int32), fast_mode=False) # Value here, before pre-processing below will be 0-255 if FLAGS.model_name == 'vgg-m': model = pickle.load(open("<vggm-tf.p>", "rb")) average_image = np.load('<vgg_average_image.npy>') image = image - average_image elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16': image = vgg_preprocessing.my_preprocess_image(image) elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \ FLAGS.model_name == 'inception-v4': image = tf.cast(image, tf.float32) * (1. / 255) image = (image - 0.5) * 2 else: sys.exit("Invalid value for model name!") label = tf.reshape(final_vps, (3, 2)) all_label = tf.concat([label, [[0], [0], [0]]], axis=1) output_label, output_indices = util_tfprojection.get_all_projected_from_3vps_modified_tf( all_label, no_bins=num_bins, img_dims=(net_width, net_height), verbose=False) if is_smoothing: stddev = 0.5 max_indices = tf.argmax(output_label, axis=1) normalized = tf.distributions.Normal( loc=tf.reshape(tf.cast(max_indices, dtype=tf.float64), (no_output_params, 1)), scale=tf.constant(stddev, dtype=tf.float64)) probs = normalized.prob( tf.tile( tf.reshape( tf.cast(tf.range(output_label.shape[1]), dtype=tf.float64), (1, -1)), (no_output_params, 1))) act_normalized = probs / tf.reduce_sum( probs, axis=1, keepdims=True) label = tf.reshape(act_normalized, [-1]) else: label = tf.reshape(output_label, [-1]) print("SHAPE AT END:", image, label) # -------------------------------------------------------------------------------------------------------------------- # shuffle requires 'min_after_dequeue' parameter (min to keep in queue) images, labels = tf.train.shuffle_batch( [image, label], batch_size=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=6 * FLAGS.batch_size, min_after_dequeue=4 * FLAGS.batch_size) labels = tf.stop_gradient(labels) ########################### # Reading evaluation data # ########################### if FLAGS.model_name == 'inception-v4': eval_path = '' else: eval_path = '<eval-CARLA-VP.tfrecords' eval_max_batch_size = min(50, FLAGS.batch_size) no_eval_examples = sum( 1 for _ in tf.python_io.tf_record_iterator(eval_path)) divs = np.array(list(factors(no_eval_examples))) sorted_divs = divs[divs.argsort()] eval_batch_size = sorted_divs[sorted_divs < eval_max_batch_size][-1] print("EVALUATION BATCH SIZE:", eval_batch_size) print("Number of examples in evaluation dataset: ", no_eval_examples) eval_filename_queue = tf.train.string_input_producer( [eval_path]) # , num_epochs=2) e_image, e_label = util_tfio.read_and_decode_evaluation( eval_filename_queue, eval_num_classes, net_height, net_width) print("eval_num_classes:", eval_num_classes) # Value here, before pre-processing below will be 0-255 if FLAGS.model_name == 'vgg-m': e_image = e_image - average_image elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16': e_image = vgg_preprocessing.my_preprocess_image(e_image) elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \ FLAGS.model_name == 'inception-v4': e_image = tf.cast(e_image, tf.float32) * (1. / 255) e_image = (e_image - 0.5) * 2 else: sys.exit("Invalid value for model name!") e_images, e_labels = tf.train.batch( [e_image, e_label], batch_size=eval_batch_size, num_threads=FLAGS.num_preprocessing_threads, capacity=5 * eval_batch_size) # -------------------------- print("PREFETCH_QUEUE, CAPACITY:", FLAGS.batch_size, ", NUM_THREADS:", FLAGS.num_preprocessing_threads) batch_queue = slim.prefetch_queue.prefetch_queue( [images, labels], capacity=FLAGS.batch_size, num_threads=FLAGS.num_preprocessing_threads) images, labels = batch_queue.dequeue() if FLAGS.model_name == 'vgg-m': logits = vgg_m.cnn_vggm(images, num_classes=num_classes, model=model) eval_logits = vgg_m.cnn_vggm(e_images, num_classes=num_classes, model=model, reuse=True) elif FLAGS.model_name == 'vgg-16': with slim.arg_scope(vgg.vgg_arg_scope()): logits, end_points = vgg.vgg_16(images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = vgg.vgg_16(e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'resnet-50': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_50( images, num_classes=num_classes, is_training=True) eval_logits, _ = resnet_v1.resnet_v1_50( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'resnet-101': with slim.arg_scope(resnet_v1.resnet_arg_scope()): logits, end_points = resnet_v1.resnet_v1_101( images, num_classes=num_classes, is_training=True) eval_logits, _ = resnet_v1.resnet_v1_101( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'inception-v1': with slim.arg_scope(inception_v1.inception_v1_arg_scope()): logits, end_points = inception_v1.inception_v1( images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = inception_v1.inception_v1( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'inception-v4': with slim.arg_scope(inception_v4.inception_v4_arg_scope()): logits, end_points = inception_v4.inception_v4( images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = inception_v4.inception_v4( e_images, num_classes=num_classes, is_training=False, reuse=True) elif FLAGS.model_name == 'mobilenet-v1': with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()): logits, end_points = mobilenet_v1.mobilenet_v1( images, num_classes=num_classes, is_training=True, dropout_keep_prob=dropout_val) eval_logits, _ = mobilenet_v1.mobilenet_v1( e_images, num_classes=num_classes, is_training=False, reuse=True) else: sys.exit("Invalid value for model name!") jumps = int(num_classes / no_output_params) classification_loss_1 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels[:, :jumps], logits=logits[:, :jumps])) classification_loss_2 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels[:, jumps:2 * jumps], logits=logits[:, jumps:2 * jumps])) classification_loss_3 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels[:, 2 * jumps:3 * jumps], logits=logits[:, 2 * jumps:3 * jumps])) classification_loss_4 = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=labels[:, 3 * jumps:4 * jumps], logits=logits[:, 3 * jumps:4 * jumps])) ############################################################################################## # try implementing L1 loss among both here to help visualize comparison with validation loss logits_ind = tf.argmax(tf.reshape(logits, (-1, no_output_params, num_bins)), axis=2) labels_ind = tf.argmax(tf.reshape(labels, (-1, no_output_params, num_bins)), axis=2) print("Logits_ind shape:", logits_ind.shape) train_l1_loss = tf.reduce_sum(tf.abs(logits_ind - labels_ind)) regularization_loss = tf.add_n(slim.losses.get_regularization_losses()) total_loss = (classification_loss_1 + classification_loss_2 + classification_loss_3 + classification_loss_4 + regularization_loss) print("After classification loss:") print(logits.shape) print(labels.shape) print("---------------------------------------") # Gather initial summaries. summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Add summaries for losses. # for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope): for loss in tf.get_collection(tf.GraphKeys.LOSSES): summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss)) # Add summaries for variables. for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) ######################################### # Configure the optimization procedure. # ######################################### learning_rate = tf.placeholder(tf.float32, shape=[], name="learning_rate") optimizer = util_tftraining.configure_optimizer(learning_rate, FLAGS=FLAGS) print("learning rate tensor:", learning_rate) # Variables to train. variables_to_train = util_tftraining.get_variables_to_train( FLAGS=FLAGS) print("-----------------------------------------") print("variables to train: ", variables_to_train) print("-----------------------------------------") train_op = slim.learning.create_train_op( total_loss=total_loss, optimizer=optimizer, variables_to_train=variables_to_train, global_step=global_step) if classification_loss_1 is not None: tf.summary.scalar('Losses/classification_loss_1', classification_loss_1) if classification_loss_2 is not None: tf.summary.scalar('Losses/classification_loss_2', classification_loss_2) if classification_loss_3 is not None: tf.summary.scalar('Losses/classification_loss_3', classification_loss_3) if classification_loss_4 is not None: tf.summary.scalar('Losses/classification_loss_4', classification_loss_4) if regularization_loss is not None: tf.summary.scalar('Losses/regularization_loss', regularization_loss) # Add total_loss to summary. summaries.add(tf.summary.scalar('total_loss', total_loss)) summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES)) # Merge all summaries together. tf.summary.merge(list(summaries), name='summary_op') session_config = tf.ConfigProto() session_config.allow_soft_placement = True session_config.gpu_options.allow_growth = True init_fn = util_tftraining.get_init_fn(FLAGS=FLAGS) print("Before learning.train", flush=True) print("---------------------------------------------------") print("---------------------------------------------------") early_stop_epochs = 10 no_steps_in_epoch = int(np.ceil(num_samples / FLAGS.batch_size)) scaffold = tf.train.Scaffold(saver=tf.train.Saver( max_to_keep=early_stop_epochs + 3)) show_eval_loss_every_steps = no_steps_in_epoch / 5 save_checkpoint_every_steps = no_steps_in_epoch / 5 with tf.train.MonitoredTrainingSession( master='', is_chief=True, checkpoint_dir=FLAGS.train_dir, scaffold=scaffold, hooks=None, chief_only_hooks=None, save_checkpoint_steps=save_checkpoint_every_steps, save_summaries_secs=FLAGS.save_summaries_secs, config=session_config, stop_grace_period_secs=120, log_step_count_steps=0, max_wait_secs=10) as mon_sess: print("-----------------------------------------") if init_fn is not None: init_fn(mon_sess) print("Succesfully loaded model") else: print("A model already exists in the 'train_dir' path") print("-----------------------------------------") last_sum_train_loss = 0 last_sum_tl1_loss = 0 best_sum_train_loss = np.inf step_no = 0 current_lr = FLAGS.learning_rate no_params = 7 consider_params = 4 consider_top = 11 best_eval_wa = np.inf best_eval_epoch = 0 while True: _, train_loss, tl1_loss = mon_sess.run( [train_op, total_loss, train_l1_loss], feed_dict={learning_rate: current_lr}) last_sum_train_loss += train_loss last_sum_tl1_loss += tl1_loss epoch_no = int( np.floor((step_no * FLAGS.batch_size) / num_samples)) if np.mod(step_no, FLAGS.log_every_n_steps) == 0: print("Epoch {}, Step {}, lr={:0.5f}, Loss: {}".format( epoch_no, step_no, current_lr, train_loss), flush=True) # calculating evaluation loss alongside as well if np.mod(step_no, show_eval_loss_every_steps) == 0: print("--In eval block--") total_l1_loss = 0 total_wa_loss = 0 for loop_no in range( int(np.floor(no_eval_examples / eval_batch_size))): np_rawpreds, np_labels = mon_sess.run( [eval_logits, e_labels]) for i in range(eval_batch_size): predicted_label = np.argmax( np_rawpreds[i, :].reshape(consider_params, -1), axis=1) gt_label = np.argmax(np_labels[i, :].reshape( no_params, -1)[:consider_params, :], axis=1) l1_loss = np.sum(np.abs(predicted_label - gt_label)) wa = 0 for ln in range(consider_params): predsoft = my_softmax( np_rawpreds[i, :].reshape( consider_params, -1)[ln, :][np.newaxis]) predsoft = predsoft.squeeze() labsoft = np_labels[i, :].reshape( no_params, -1)[ln, :] topindices = predsoft.argsort( )[::-1][:consider_top] probsindices = predsoft[topindices] / np.sum( predsoft[topindices]) wa += np.abs( int( np.round( np.sum(probsindices * topindices))) - labsoft.argmax()) total_l1_loss += l1_loss total_wa_loss += wa avg_manhattan_loss = total_l1_loss / no_eval_examples avg_wa_loss = total_wa_loss / no_eval_examples print( "-------------------------------------------------------------------" ) print("Average manhattan loss per scalar:", avg_manhattan_loss / consider_params) print( "Average manhattan loss(Weighted avg. top 10 bins)per scalar:", avg_wa_loss / consider_params) print( "-------------------------------------------------------------------", flush=True) if avg_wa_loss < best_eval_wa: best_eval_wa = avg_wa_loss best_eval_epoch = epoch_no if avg_wa_loss > best_eval_wa and ( epoch_no - best_eval_epoch ) > early_stop_epochs and current_lr < 1e-3 and epoch_no > 10: print("STOPPING TRAINING at epoch: ", epoch_no, ", best epoch was:", best_eval_epoch, "(step: ", best_eval_epoch * num_samples / FLAGS.batch_size, ")") print("Current eval_wa:", avg_wa_loss, ", best eval_wa:", best_eval_wa) break if step_no > 0: last_sum_train_loss /= show_eval_loss_every_steps last_sum_tl1_loss /= (no_steps_in_epoch * FLAGS.batch_size * no_output_params) if last_sum_train_loss > best_sum_train_loss: if current_lr > FLAGS.end_learning_rate: print("Dividing learning rate by 10.0") current_lr /= 10.0 best_sum_train_loss = last_sum_train_loss else: print( "Already reached lowest possible lr i.e. ", current_lr) else: best_sum_train_loss = last_sum_train_loss print("last_sum_train_loss:", last_sum_train_loss) print("L1_train_loss:", last_sum_tl1_loss) last_sum_train_loss = 0 last_sum_tl1_loss = 0 ######################################################################################### step_no += 1 if FLAGS.max_number_of_steps is not None: if step_no >= FLAGS.max_number_of_steps: break print("Final Step {}, Loss: {}".format(step_no, train_loss)) print("---------------------The End-----------------------") print("---------------------------------------------------") print("---------------------------------------------------")