Python resnet_v1_101の例、nets.resnet_v1.resnet_v1_101 Pythonの例

コード例 #1

0

ファイルを表示

    def _build(self, weight_path, sess, input_type=InputType.BASE64_JPEG):
        self.input_tensor = None
        self.session = sess
        if input_type == InputType.TENSOR:
            self.input = tf.placeholder(tf.float32,
                                        shape=[None, 224, 224, 3],
                                        name="input")
            self.input_tensor = self.input
        elif input_type == InputType.BASE64_JPEG:
            self.input = tf.placeholder(tf.string,
                                        shape=(None, ),
                                        name='input')
            self.input_tensor = load_base64_tensor(self.input)
        else:
            raise ValueError('invalid input type')

        # only load inference model
        with arg_scope(
                resnet_v1.resnet_arg_scope(activation_fn=tf.nn.relu,
                                           weight_decay=0.0001)):
            self.logits_val, end_points = resnet_v1.resnet_v1_101(
                self.input_tensor,
                num_classes=self.num_classes,
                is_training=False,
                reuse=tf.AUTO_REUSE)
        # self.predictions = tf.nn.softmax(self.logits_val, name='Softmax')
        self.predictions = end_points['predictions']
        self.output = tf.identity(self.predictions, name='outputs')

        if weight_path is not None:
            self.load_trained_weights(weight_path)

コード例 #2

0

ファイルを表示

ファイル: test_single_image.py プロジェクト: Nebula4869/test_ImageNet_models

def test_resnet_v1_101(img_dir):
    """
    Test ResNet-V1-101 with a single image.
    :param img_dir: Path of the image to be classified
    :return: classification result and probability of a single image
    """
    img = cv2.imread(img_dir)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (224, 224))
    img = img.reshape((1, 224, 224, 3))

    tf.reset_default_graph()
    inputs = tf.placeholder(name='input_images',
                            shape=[None, 224, 224, 3],
                            dtype=tf.float32)
    with slim.arg_scope(resnet_arg_scope()):
        _, _ = resnet_v1_101(inputs, 1000, is_training=False)

    with tf.Session() as sess:
        tf.train.Saver().restore(sess, './models/resnet_v1_101.ckpt')
        inputs = sess.graph.get_tensor_by_name('input_images:0')
        outputs = sess.graph.get_tensor_by_name(
            'resnet_v1_101/SpatialSqueeze:0')
        pred = tf.argmax(tf.nn.softmax(outputs), axis=1)[0]
        prob = tf.reduce_max(tf.nn.softmax(outputs), axis=1)[0]

        pred, prob = sess.run([pred, prob], feed_dict={inputs: img})
        name = label_dict[pred + 1]

    print('Result of ResNet-V1-101:', name, prob)
    return name, prob

コード例 #3

0

ファイルを表示

def single_tower(colors,
                 depths,
                 num_classes=3,
                 num_channels=1000,
                 is_training=True,
                 global_pool=False,
                 output_stride=16,
                 spatial_squeeze=False,
                 scope='arcnet'):
    inputs = tf.concat([colors, depths], axis=3)
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = resnet_v1.resnet_v1_101(
            inputs=inputs,
            num_classes=num_channels,
            is_training=is_training,
            global_pool=global_pool,
            output_stride=output_stride,
            spatial_squeeze=spatial_squeeze,
            scope=scope + '_tower')
    with tf.variable_scope(scope, 'arcnet', [net]) as sc:
        end_points_collection = sc.original_name_scope + '_end_points'
        # to do: add batch normalization to the following conv layers.
        with slim.arg_scope([slim.conv2d],
                            outputs_collections=end_points_collection):
            net = slim.conv2d(net, 512, [1, 1], scope='conv1')
            net = slim.conv2d(net, 128, [1, 1], scope='conv2')
            net = slim.conv2d(net, num_classes, [1, 1], scope='conv3')
            height, width = net.get_shape().as_list()[1:3]
            net = tf.image.resize_bilinear(net, [height * 2, width * 2],
                                           name='resize_bilinear')
            end_points = slim.utils.convert_collection_to_dict(
                end_points_collection)
    end_points['logits'] = net
    return net, end_points

コード例 #4

0

ファイルを表示

 def create_network(self):
     with tf.contrib.slim.arg_scope(resnet_arg_scope()):
         logits, end_points = resnet_v1_101(self.img,
                                            num_classes=self.nb_class,
                                            is_training=self.is_training,
                                            global_pool=True,
                                            spatial_squeeze=True)
     self.logits = logits
     self.probabilities = tf.nn.sigmoid(self.logits)
     self.predictions = tf.cast(
         self.probabilities >= self.prediction_threshold, tf.float32)

コード例 #5

0

ファイルを表示

ファイル: model.py プロジェクト: zhDai/3D-Teeth-Reconstruction-from-CT-Scans

def model(model_type, images, weight_decay=1e-5, is_training=True):
	images = mean_image_subtraction(images)

	with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
		logits, end_points = resnet_v1.resnet_v1_101(images, is_training=is_training, scope='resnet_v1_101')

	with tf.variable_scope('feature_fusion', values=[end_points.values]):
		batch_norm_params = {'decay': 0.997,'epsilon': 1e-5,'scale': True,'is_training': is_training}
		with slim.arg_scope([slim.conv2d],
							activation_fn=tf.nn.relu,
							normalizer_fn=slim.batch_norm,
							normalizer_params=batch_norm_params,
							weights_regularizer=slim.l2_regularizer(weight_decay)):

			f = [end_points['pool5'], end_points['pool4'],
				 end_points['pool3'], end_points['pool2']]
			for i in range(4):
				print('Shape of f_{} {}'.format(i, f[i].shape))

			g = [None, None, None, None]
			h = [None, None, None, None]

			for i in range(4):
				h[i]=slim.conv2d(f[i], 256, 1)
			for i in range(4):
				print('Shape of h_{} {}'.format(i, h[i].shape))

			g[0]=RefineBlock(high_inputs=None,low_inputs=h[0])
			g[1]=RefineBlock(g[0],h[1])
			g[2]=RefineBlock(g[1],h[2])
			g[3]=RefineBlock(g[2],h[3])
			#g[3]=unpool(g[3],scale=4)

			output = g[3]

			if model_type == 'sesnet':
				in_shape = g[3].shape
				output = tf.expand_dims(g[3], axis=0)

				lstm_cell_1 = ConvLSTMCell([in_shape[1], in_shape[2]], in_shape[3] // 2, [3, 3])
				lstm_cell_2 = ConvLSTMCell([in_shape[1], in_shape[2]], in_shape[3] // 4, [3, 3])

				with tf.variable_scope('rnn_scope_0', reuse=tf.AUTO_REUSE):
					output0, _ = tf.nn.dynamic_rnn(lstm_cell_1, output, dtype=output.dtype)
				with tf.variable_scope('rnn_scope_1', reuse=tf.AUTO_REUSE):
					output1, _ = tf.nn.dynamic_rnn(lstm_cell_2, output0, dtype=output0.dtype)

				output = tf.squeeze(output1, axis=0)

			F_score = slim.conv2d(output, 2, 1, activation_fn=tf.nn.relu, normalizer_fn=None)

	return F_score

コード例 #6

0

ファイルを表示

def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_101(image, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_101/block4'], [1, 2], name='pool5')

    return endpoints, 'resnet_v1_101'

コード例 #7

0

ファイルを表示

def resnet_v1_101(inputs, is_training, opts):
    with slim.arg_scope(resnet_v1.resnet_arg_scope(
            weight_decay=opts.weight_decay,
            batch_norm_decay=opts.batch_norm_decay,
            batch_norm_epsilon=opts.batch_norm_epsilon,
            activation_fn=tf.nn.relu)):
        return resnet_v1.resnet_v1_101(
            inputs,
            num_classes=opts.num_classes,
            is_training=is_training,
            global_pool=opts.global_pool,
            output_stride=None,
            spatial_squeeze=opts.spatial_squeeze,
            reuse=None)

コード例 #8

0

ファイルを表示

ファイル: model.py プロジェクト: fendaq/tensorflow_ocr

def model_resnet_v1_101(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_101(images, is_training=is_training, scope='resnet_v1_101')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
        'decay': 0.997,
        'epsilon': 1e-5,
        'scale': True,
        'is_training': is_training
        }
        with slim.arg_scope([slim.conv2d],
                            activation_fn=tf.nn.relu,
                            normalizer_fn=slim.batch_norm,
                            normalizer_params=batch_norm_params,
                            weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [end_points['pool5'], end_points['pool4'],
                 end_points['pool3'], end_points['pool2']]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry

コード例 #9

0

ファイルを表示

ファイル: model.py プロジェクト: hiyaroy12/multitask_learning

def model(images, weight_decay=1e-5, is_training=True):
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_101(images,
                                                     is_training=is_training,
                                                     scope='resnet_v1_101')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):

            f = [
                end_points['pool5'], end_points['pool4'], end_points['pool3'],
                end_points['pool2']
            ]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))

            g = [None, None, None, None]
            h = [None, None, None, None]

            for i in range(4):
                h[i] = slim.conv2d(f[i], 256, 1)
            for i in range(4):
                print('Shape of h_{} {}'.format(i, h[i].shape))

            g[0] = RefineBlock(high_inputs=None, low_inputs=h[0])
            g[1] = RefineBlock(g[0], h[1])
            g[2] = RefineBlock(g[1], h[2])
            g[3] = RefineBlock(g[2], h[3])
            #g[3]=unpool(g[3],scale=4)
            F_score = slim.conv2d(g[3],
                                  21,
                                  1,
                                  activation_fn=tf.nn.relu,
                                  normalizer_fn=None)

    return F_score

コード例 #10

0

ファイルを表示

ファイル: model.py プロジェクト: thepsiwa/machine-learning

def build_FPN(images, config, is_training, backbone='resnet50'):
    # images: [batch, h, w, channels]
    # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the
    # 		  feature pyramid. Each is [batch, height, width, channels]
    pyramid = {}
    # build backbone network
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)):
        if backbone == "resnet50":
            logits, end_points = resnet_v1.resnet_v1_50(
                images, is_training=is_training, scope='resnet_v1_50')
            pyramid['C2'] = end_points[
                'resnet_v1_50/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_50/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_50/block3/unit_5/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_50/block4/unit_3/bottleneck_v1']
        elif backbone == "resnet101":
            logits, end_points = resnet_v1.resnet_v1_101(
                images, is_training=is_training, scope='resnet_v1_101')
            pyramid['C2'] = end_points[
                'resnet_v1_101/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_101/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_101/block3/unit_22/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_101/block4/unit_3/bottleneck_v1']
        else:
            print("Unkown backbone : ", backbone)
    # build FPN
    pyramid_feature = {}
    arg_scope = _extra_conv_arg_scope_with_bn()
    with tf.variable_scope('FPN'):
        with slim.arg_scope(arg_scope):
            pyramid_feature['P5'] = slim.conv2d(pyramid['C5'],
                                                config.TOP_DOWN_PYRAMID_SIZE,
                                                1)
            for i in range(4, 1, -1):
                upshape = tf.shape(pyramid['C%d' % i])
                u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \
                 size = (upshape[1], upshape[2]))
                c = slim.conv2d(pyramid['C%d' % i],
                                config.TOP_DOWN_PYRAMID_SIZE, 1)
                s = tf.add(c, u)
                pyramid_feature['P%d' % i] = slim.conv2d(
                    s, config.TOP_DOWN_PYRAMID_SIZE, 3)
    return pyramid_feature

コード例 #11

0

ファイルを表示

ファイル: resnet_v1.py プロジェクト: ru003ar/BigDL

def main():
    """
    You can also run these commands manually to generate the pb file
    1. git clone https://github.com/tensorflow/models.git
    2. export PYTHONPATH=Path_to_your_model_folder
    3. python alexnet.py
    """
    tf.set_random_seed(1)
    height, width = 224, 224
    inputs = tf.Variable(tf.random_uniform((2, height, width, 3)), name='input')
    inputs = tf.identity(inputs, "input_node")
    net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=True)
    print("nodes in the graph")
    for n in end_points:
        print(n + " => " + str(end_points[n]))
    net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x), argv[2].split(','))
    run_model(net_outputs, argv[1], 'resnet_v1_101', argv[3] == 'True')

コード例 #12

0

ファイルを表示

ファイル: resnet_v1.py プロジェクト: vaquarkhan/BigDL

def main():
    """
    You can also run these commands manually to generate the pb file
    1. git clone https://github.com/tensorflow/models.git
    2. export PYTHONPATH=Path_to_your_model_folder
    3. python alexnet.py
    """
    height, width = 224, 224
    inputs = tf.Variable(tf.random_uniform((2, height, width, 3)),
                         name='input')
    net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=True)
    print("nodes in the graph")
    for n in end_points:
        print(n + " => " + str(end_points[n]))
    net_outputs = map(lambda x: tf.get_default_graph().get_tensor_by_name(x),
                      argv[2].split())
    run_model(net_outputs, argv[1])

コード例 #13

0

ファイルを表示

ファイル: model.py プロジェクト: netpcvnn/tf-crnn

def deep_cnn(input_imgs: tf.Tensor,
             is_training: bool,
             cnn_model='original_cnn',
             summaries: bool = True) -> tf.Tensor:
    input_tensor = input_imgs
    if input_tensor.shape[-1] == 1:
        input_channels = 1
    elif input_tensor.shape[-1] == 3:
        input_channels = 3
    else:
        raise NotImplementedError

    # Following source code, not paper

    if cnn_model == "resnet_50":
        with tf.variable_scope('resnet_50'):
            cnn_net, _ = resnet_v1_50(input_tensor,
                                      is_training=is_training,
                                      global_pool=False,
                                      on_text=True)
    elif cnn_model == "resnet_101":
        with tf.variable_scope('resnet_101'):
            cnn_net, _ = resnet_v1_101(input_tensor,
                                       is_training=is_training,
                                       global_pool=False,
                                       on_text=True)
    else:
        with tf.variable_scope('original_cnn'):
            cnn_net = original_cnn(input_tensor,
                                   input_channels,
                                   is_training=is_training,
                                   summaries=summaries)

    with tf.variable_scope('Reshaping_cnn'):
        shape = cnn_net.get_shape().as_list(
        )  # [batch, height, width, features]
        transposed = tf.transpose(
            cnn_net, perm=[0, 2, 1, 3],
            name='transposed')  # [batch, width, height, features]
        conv_reshaped = tf.reshape(
            transposed, [shape[0], -1, shape[1] * shape[3]],
            name='reshaped')  # [batch, width, height x features]

    return conv_reshaped

コード例 #14

0

ファイルを表示

 def __call__(self, x_input):
     """Constructs model and return probabilities for given input."""
     reuse = True if self.built else None
     x_input = image_normalize(x_input, normalization_method[5])
     x_input = tf.image.resize_images(x_input, [224, 224])
     with slim.arg_scope(resnet_v1.resnet_arg_scope()):
         _, end_points = resnet_v1.resnet_v1_101(
             x_input,
             num_classes=self.num_classes - 1,
             is_training=False,
             reuse=reuse)
     self.built = True
     end_points['predictions'] = \
                   tf.concat([tf.zeros([tf.shape(x_input)[0], 1]),
                                   tf.reshape(end_points['predictions'], [-1, 1000])],
                                   axis=1)
     output = end_points['predictions']
     # Strip off the extra reshape op at the output
     return output

コード例 #15

0

ファイルを表示

ファイル: fcn_resincpv2.py プロジェクト: vicchu/segModel

def fcn_res101(images, num_classes, is_training=True):

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = resnet_v1.resnet_v1_101(images,
                                                  2,
                                                  is_training=False,
                                                  global_pool=False,
                                                  spatial_squeeze=False,
                                                  output_stride=16)

        #        nn.Conv2D(num_classes, kernel_size=1),
        #        nn.Conv2DTranspose(num_classes, kernel_size=64, padding=16,strides=32)

        #        pool4=end_points['resnet_v1_101/pool4']
        #
        #        dconv1_out=pool4.get_shape().as_list()
        #
        #
        #        deconv1=slim.conv2d_transpose(net,dconv1_out[3],[4,4], stride=2,scope='deconv1')
        #
        #        fu1=tf.add(deconv1,pool4)
        #
        #
        #        pool3=end_points['resnet_v1_101/pool3']
        #        dconv2_out=pool3.get_shape().as_list()
        #        deconv2=slim.conv2d_transpose(fu1,dconv2_out[3],[4,4], stride=2,scope='deconv2')
        #
        #        fu2=tf.add(deconv2,pool3)
        #
        logit = slim.conv2d_transpose(net,
                                      2, [32, 32],
                                      stride=16,
                                      scope='deconv32')
        prediction = tf.argmax(logit, dimension=3)  #, name="prediction")

        return logit, prediction

コード例 #16

0

ファイルを表示

def processing(im_path,dimx,dimy):
    img = image.load_img(im_path, target_size=(dimx, dimy))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x
conv_out_train=[]
with tf.Graph().as_default():
    tf.logging.set_verbosity(tf.logging.INFO)


    images = tf.placeholder(tf.float32, shape=(1, dimx, dimy, 3))
    labels = tf.placeholder(tf.uint8, shape=(1,1))

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        logits, _ = resnet_v1.resnet_v1_101(images, num_classes=num_classes, is_training=False)

    probs = tf.argmax(tf.nn.softmax(logits),axis=1)
    one_hot_labels = slim.one_hot_encoding(labels, num_classes)
    gt = tf.argmax(one_hot_labels,axis=-1)
    accuracy = slim.metrics.accuracy(probs, gt)
    init_fn = get_init_fn(model_path);
    with tf.Session() as sess:

        sess.run(tf.initialize_all_variables())
        init_fn(sess)

        #conv_out_train = []
        for class_id,prod_per_class in enumerate(product_files_per_class):
            conv_out = []
            for prod in prod_per_class:

コード例 #17

0

ファイルを表示

def _construct_model(model_type='resnet_v1_50'):
    """Constructs model for the desired type of CNN.

  Args:
    model_type: Type of model to be used.

  Returns:
    end_points: A dictionary from components of the network to the corresponding
      activations.

  Raises:
    ValueError: If the model_type is not supported.
  """
    # Placeholder input.
    images = array_ops.placeholder(dtypes.float32,
                                   shape=(1, None, None, 3),
                                   name=_INPUT_NODE)

    # Construct model.
    if model_type == 'inception_resnet_v2':
        _, end_points = inception.inception_resnet_v2_base(images)
    elif model_type == 'inception_resnet_v2-same':
        _, end_points = inception.inception_resnet_v2_base(
            images, align_feature_maps=True)
    elif model_type == 'inception_v2':
        _, end_points = inception.inception_v2_base(images)
    elif model_type == 'inception_v2-no-separable-conv':
        _, end_points = inception.inception_v2_base(images,
                                                    use_separable_conv=False)
    elif model_type == 'inception_v3':
        _, end_points = inception.inception_v3_base(images)
    elif model_type == 'inception_v4':
        _, end_points = inception.inception_v4_base(images)
    elif model_type == 'alexnet_v2':
        _, end_points = alexnet.alexnet_v2(images)
    elif model_type == 'vgg_a':
        _, end_points = vgg.vgg_a(images)
    elif model_type == 'vgg_16':
        _, end_points = vgg.vgg_16(images)
    elif model_type == 'mobilenet_v1':
        _, end_points = mobilenet_v1.mobilenet_v1_base(images)
    elif model_type == 'mobilenet_v1_075':
        _, end_points = mobilenet_v1.mobilenet_v1_base(images,
                                                       depth_multiplier=0.75)
    elif model_type == 'resnet_v1_50':
        _, end_points = resnet_v1.resnet_v1_50(images,
                                               num_classes=None,
                                               is_training=False,
                                               global_pool=False)
    elif model_type == 'resnet_v1_101':
        _, end_points = resnet_v1.resnet_v1_101(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v1_152':
        _, end_points = resnet_v1.resnet_v1_152(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v1_200':
        _, end_points = resnet_v1.resnet_v1_200(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v2_50':
        _, end_points = resnet_v2.resnet_v2_50(images,
                                               num_classes=None,
                                               is_training=False,
                                               global_pool=False)
    elif model_type == 'resnet_v2_101':
        _, end_points = resnet_v2.resnet_v2_101(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v2_152':
        _, end_points = resnet_v2.resnet_v2_152(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    elif model_type == 'resnet_v2_200':
        _, end_points = resnet_v2.resnet_v2_200(images,
                                                num_classes=None,
                                                is_training=False,
                                                global_pool=False)
    else:
        raise ValueError('Unsupported model_type %s.' % model_type)

    return end_points

コード例 #18

0

ファイルを表示

def compute_feature_of_batch_ts_with_cnn(file_path_of_ts, file_path_of_feature,
                                         cnn_model_name,
                                         file_path_of_pretrained_model):
    r'''
    compute feature of somme time series with pretrained CNN
    :param file_path_of_ts: file path of time series
    :param file_path_of_feature: file path of saving feature
    :param cnn_model_name: name of CNN model
    :param file_path_of_pretrained_model: file path of pretrained CNN
    :return: ''
    '''
    #tf.reset_default_graph()
    #read data
    data = pd.read_csv(file_path_of_ts)
    #data=data.sample(20)
    #change dataframe to list
    id_list = data.iloc[:, 0].tolist()
    data_list = change_dataframe_to_dict_(data)

    model = cnn_model_name
    checkpoint_file = file_path_of_pretrained_model

    # I only have these because I thought some take in size of (299,299), but maybe not
    if 'inception' in model: height, width, channels = 224, 224, 3
    if 'resnet' in model: height, width, channels = 224, 224, 3
    if 'vgg' in model: height, width, channels = 224, 224, 3

    if model == 'inception_resnet_v2': height, width, channels = 299, 299, 3

    x = tf.placeholder(tf.float32, shape=(1, height, width, channels))

    # load up model specific stuff
    if model == 'inception_v1':
        #from inception_v1 import *
        from nets import inception_v1

        arg_scope = inception_v1.inception_v1_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_v1.inception_v1(x,
                                                           is_training=False,
                                                           num_classes=None)
            features = end_points['AvgPool_0a_7x7']
            # print('logits')
            # print(logits.shape)
            # print('features')
            # print(features.shape)
    elif model == 'inception_v2':
        #from inception_v2 import *
        from nets import inception_v2

        arg_scope = inception_v2.inception_v2_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_v2(x,
                                              is_training=False,
                                              num_classes=None)
            features = end_points['AvgPool_1a']
    elif model == 'inception_v3':
        #from inception_v3 import *
        from nets import inception_v3

        arg_scope = inception_v3.inception_v3_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_v3(x,
                                              is_training=False,
                                              num_classes=None)
            features = end_points['AvgPool_1a']
    elif model == 'inception_resnet_v2':
        #from inception_resnet_v2 import *
        from nets import inception_resnet_v2

        arg_scope = inception_resnet_v2.inception_resnet_v2_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = inception_resnet_v2(x,
                                                     is_training=False,
                                                     num_classes=1001)
            features = end_points['PreLogitsFlatten']
    elif model == 'resnet_v1_50':
        #from resnet_v1 import *

        from nets import resnet_v1

        arg_scope = resnet_v1.resnet_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = resnet_v1.resnet_v1_50(x,
                                                        is_training=False,
                                                        num_classes=1000)
            features = end_points['global_pool']
    elif model == 'resnet_v1_101':
        #from resnet_v1 import *
        from nets import resnet_v1

        arg_scope = resnet_v1.resnet_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = resnet_v1.resnet_v1_101(x,
                                                         is_training=False,
                                                         num_classes=1000)
            features = end_points['global_pool']
    elif model == 'vgg_16':
        #from vgg import *
        from nets import vgg

        arg_scope = vgg.vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = vgg.vgg_16(x, is_training=False)
            features = end_points['vgg_16/fc8']
    elif model == 'vgg_19':
        #from vgg import *
        from nets import vgg

        arg_scope = vgg.vgg_arg_scope()
        with slim.arg_scope(arg_scope):
            logits, end_points = vgg.vgg_19(x, is_training=False)
            features = end_points['vgg_19/fc8']
    #cpu_config = tf.ConfigProto(intra_op_parallelism_threads = 8, inter_op_parallelism_threads = 8, device_count = {'CPU': 3})
    #sess = tf.Session(config = cpu_config)
    sess = tf.Session()
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_file)
    feature_list = []
    count_temp = 0

    for i in range(len(data_list)):
        count_temp = count_temp + 1
        #imaging ts
        ts_dict = data_list[i]
        ts = ts_dict['ts']
        id = ts_dict['id']
        new_ts = min_max_transform(ts)
        normalized = np.array(new_ts)
        fig, ax = plt.subplots()
        #plt.imshow(recurrence_plot.rec_plot(normalized), cmap=plt.cm.gray)
        plt.imshow(recurrence_plot.rec_plot(normalized))
        ax.set_xticks([])
        ax.set_yticks([])
        #print(id)
        path = "inception-v1/" + id + ".jpg"
        plt.savefig(path)
        plt.close(fig)
        #compute feature
        # #begin to compute features
        image = misc.imread(path)
        #from matplotlib.pyplot import imread
        #image=imread(path)
        # print('image')
        # print(image.size)
        image = misc.imresize(image, (height, width))
        image = np.expand_dims(image, 0)
        feature = np.squeeze(sess.run(features, feed_dict={x: image}))
        feature_list.append(feature)
        # print('feature-test')
        # print(feature)
        os.remove(path)
        if count_temp % 100 == 0:
            print(count_temp)
        #begin to process parellel result and write_to_csv
    feature_array = np.array(feature_list)

    feature_df = pd.DataFrame(feature_array)
    # print(feature_df.shape)
    # print(len(id_list))
    #add id
    feature_df.insert(loc=0, column='id', value=id_list)
    # print(feature_final_df.shape)
    # print(feature_final_df.head())
    feature_df.to_csv(file_path_of_feature, index=False)
    gc.collect()

コード例 #19

0

ファイルを表示

ファイル: extract_feature_2_TFRecords_OpenImages.py プロジェクト: sorrowyn/cvpr20_IMCL

processed_images, img_ids, labels = dataset.make_one_shot_iterator().get_next()
#%%
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.InteractiveSession(config=config)
g = tf.get_default_graph()
#%%
with g.as_default():
    if is_save:
        feature_writer = tf.python_io.TFRecordWriter(feature_tfrecord_filename)

    img_input_ph = tf.placeholder(dtype=tf.float32,
                                  shape=[None, height, width, 3])
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        _, _ = resnet_v1.resnet_v1_101(img_input_ph,
                                       num_classes=5000,
                                       is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(checkpoints_dir,
                                                 slim.get_model_variables())
        features = g.get_tensor_by_name('resnet_v1_101/pool5:0')

    idx = 0
    init_fn(sess)
    while True:  #idx < 3125:
        try:
            processed_images_v, img_ids_v, labels_v = sess.run(
                [processed_images, img_ids, labels])
            features_v = sess.run(features, {img_input_ph: processed_images_v})
            print('batch no. {}'.format(idx))
            for idx_s in range(features_v.shape[0]):
                feature = features_v[idx_s, :, :, :]

コード例 #20

0

ファイルを表示

ファイル: resnet_v1_101_8s.py プロジェクト: TrendonixNetwork/ProjectCybonix

def resnet_v1_101_8s(image_batch_tensor,
                     number_of_classes,
                     is_training):
    """Returns the resnet_v1_101_8s model definition.
    The function returns the model definition of a network that was described
    in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
    Atrous Convolution, and Fully Connected CRFs' by Chen et al.
    The network subsamples the input by a factor of 8 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 8. This means that
    if the image size is not of the factor 8, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(resnet_v1_101_8s, 8). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 8.
    resnet_v1_101_8s_variables_mapping : dict {string: variable}
        Dict which maps the resnet_v1_101_8s model's variables to resnet_v1_101 checkpoint variables
        names. We need this to initilize the weights of resnet_v1_101_8s model with resnet_v1_101 from
        checkpoint file. Look at ipython notebook for examples.
    """
    
    with tf.variable_scope("resnet_v1_101_8s") as resnet_v1_101_8s:

        upsample_factor = 8

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        
        
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(mean_centered_image_batch,
                                                number_of_classes,
                                                is_training=is_training,
                                                global_pool=False,
                                                output_stride=8)
        

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        upsampled_logits_shape = tf.pack([
                                          downsampled_logits_shape[0],
                                          downsampled_logits_shape[1] * upsample_factor,
                                          downsampled_logits_shape[2] * upsample_factor,
                                          downsampled_logits_shape[3]
                                         ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(logits,
                                                  upsample_filter_tensor,
                                                  output_shape=upsampled_logits_shape,
                                                  strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        resnet_v1_101_8s_variables_mapping = {}

        resnet_v1_101_8s_variables = slim.get_variables(resnet_v1_101_8s)

        for variable in resnet_v1_101_8s_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            original_resnet_v1_101_checkpoint_string = variable.name[len(resnet_v1_101_8s.original_name_scope):-2]
            resnet_v1_101_8s_variables_mapping[original_resnet_v1_101_checkpoint_string] = variable

    return upsampled_logits, resnet_v1_101_8s_variables_mapping

コード例 #21

0

ファイルを表示

def main(_):
  batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
  num_classes = 1001

  # max_epsilon over checking
  # get original images
  origin_img_list=np.sort(glob.glob(FLAGS.origin_img_dir+"*.png"));
  origin_imgs=np.zeros((len(origin_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float);
  for i in range(len(origin_img_list)):
    origin_imgs[i]=imread(origin_img_list[i],mode='RGB').astype(np.float);
  # get adv images
  adv_img_list=np.sort(glob.glob(FLAGS.input_dir+"*.png"));
  adv_imgs=np.zeros((len(adv_img_list),FLAGS.image_height,FLAGS.image_width,3),dtype=float);
  for i in range(len(adv_img_list)):
    adv_imgs[i]=imread(adv_img_list[i],mode='RGB').astype(np.float);
  epsilon_list=np.linalg.norm(np.reshape(abs(origin_imgs-adv_imgs),[-1,FLAGS.image_height*FLAGS.image_width*3]),ord=np.inf,axis=1);
  #print(epsilon_list);exit(1);
  over_epsilon_list=np.zeros((len(origin_img_list),2),dtype=object);
  cnt=0;
  for i in range(len(origin_img_list)):
    file_name=origin_img_list[i].split("/")[-1];
    file_name=file_name.split(".")[0];
    over_epsilon_list[i,0]=file_name;
    if(epsilon_list[i]>FLAGS.max_epsilon):
      over_epsilon_list[i,1]="1";
      cnt+=1;
  tf.logging.set_verbosity(tf.logging.INFO)

  with tf.Graph().as_default():
    # Prepare graph
    x_input = tf.placeholder(tf.float32, shape=batch_shape)

    if(FLAGS.checkpoint_file_name=="inception_v3.ckpt"):
      with slim.arg_scope(inception.inception_v3_arg_scope()):
        _, end_points = inception.inception_v3(
            x_input, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['Predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="inception_v4.ckpt"):
      with slim.arg_scope(inception.inception_v4_arg_scope()):
        _, end_points = inception.inception_v4(
            x_input, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['Predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="inception_resnet_v2_2016_08_30.ckpt"):
      with slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
        _, end_points = inception.inception_resnet_v2(
            x_input, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['Predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="resnet_v2_101.ckpt"):
      x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False);
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        _, end_points = resnet_v2.resnet_v2_101(
            x_input2, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="resnet_v2_50.ckpt"):
      x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False);
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        _, end_points = resnet_v2.resnet_v2_50(
            x_input2, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="resnet_v2_152.ckpt"):
      x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False);
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        _, end_points = resnet_v2.resnet_v2_152(
            x_input2, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="inception_v1.ckpt"):
      x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False);
      with slim.arg_scope(inception.inception_v1_arg_scope()):
        _, end_points = inception.inception_v1(
            x_input2, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['Predictions'], 1)
    elif(FLAGS.checkpoint_file_name=="inception_v2.ckpt"):
      x_input2 = tf.image.resize_bilinear(x_input,[224,224],align_corners=False);
      with slim.arg_scope(inception.inception_v2_arg_scope()):
        _, end_points = inception.inception_v2(
            x_input2, num_classes=num_classes, is_training=False)
      predicted_labels = tf.argmax(end_points['Predictions'], 1)

    # Resnet v1 and vgg are not working now
    elif(FLAGS.checkpoint_file_name=="vgg_16.ckpt"):
      x_input_list=tf.unstack(x_input,FLAGS.batch_size,0);
      for i in range(FLAGS.batch_size):
        x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224);
      x_input2=tf.stack(x_input_list,0);
      with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points = vgg.vgg_16(
            x_input2, num_classes=num_classes-1, is_training=False)
      predicted_labels = tf.argmax(end_points['vgg_16/fc8'], 1)+1
    elif(FLAGS.checkpoint_file_name=="vgg_19.ckpt"):
      x_input_list=tf.unstack(x_input,FLAGS.batch_size,0);
      for i in range(FLAGS.batch_size):
        x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224);
      x_input2=tf.stack(x_input_list,0);
      with slim.arg_scope(vgg.vgg_arg_scope()):
        _, end_points = vgg.vgg_19(
            x_input2, num_classes=num_classes-1, is_training=False)
      predicted_labels = tf.argmax(end_points['vgg_19/fc8'], 1)+1
    elif(FLAGS.checkpoint_file_name=="resnet_v1_50.ckpt"):
      x_input_list=tf.unstack(x_input,FLAGS.batch_size,0);
      for i in range(FLAGS.batch_size):
        x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224);
      x_input2=tf.stack(x_input_list,0);
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        _, end_points = resnet_v1.resnet_v1_50(
            x_input, num_classes=num_classes-1, is_training=False)
      predicted_labels = tf.argmax(end_points['predictions'], 1)+1
    elif(FLAGS.checkpoint_file_name=="resnet_v1_101.ckpt"):
      x_input_list=tf.unstack(x_input,FLAGS.batch_size,0);
      for i in range(FLAGS.batch_size):
        x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224);
      x_input2=tf.stack(x_input_list,0);
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        _, end_points = resnet_v1.resnet_v1_101(
            x_input2, num_classes=num_classes-1, is_training=False)
      predicted_labels = tf.argmax(end_points['predictions'], 1)+1
    elif(FLAGS.checkpoint_file_name=="resnet_v1_152.ckpt"):
      x_input_list=tf.unstack(x_input,FLAGS.batch_size,0);
      for i in range(FLAGS.batch_size):
        x_input_list[i]=vgg_preprocessing.preprocess_image(x_input_list[i],224,224);
      x_input2=tf.stack(x_input_list,0);
      with slim.arg_scope(resnet_utils.resnet_arg_scope()):
        _, end_points = resnet_v1.resnet_v1_152(
            x_input2, num_classes=num_classes-1, is_training=False)
      predicted_labels = tf.argmax(end_points['predictions'], 1)+1
    
    # Run computation
    saver = tf.train.Saver(slim.get_model_variables())
    session_creator = tf.train.ChiefSessionCreator(
        scaffold=tf.train.Scaffold(saver=saver),
        checkpoint_filename_with_path=FLAGS.checkpoint_path+FLAGS.checkpoint_file_name,
        master=FLAGS.master)

    f=open(FLAGS.true_label,"r");
    t_label_list=np.array([i[:-1].split(",") for i in f.readlines()]);
    
    score=0;
    with tf.train.MonitoredSession(session_creator=session_creator) as sess:
      with tf.gfile.Open(FLAGS.output_file, 'w') as out_file:
        for filenames, images in load_images(FLAGS.input_dir, batch_shape):
          labels = sess.run(predicted_labels, feed_dict={x_input: images})
          for filename, label in zip(filenames, labels):
            f_name=filename.split(".")[0];
            t_label=int(t_label_list[t_label_list[:,0]==f_name,1][0]);
            if(t_label!=label):
              if(over_epsilon_list[over_epsilon_list[:,0]==f_name,1]!="1"):
                score+=1;
            #out_file.write('{0},{1}\n'.format(filename, label))
  print("Over max epsilon#: "+str(cnt));
  print(str(FLAGS.max_epsilon)+" max epsilon Score: "+str(score));

コード例 #22

0

ファイルを表示

ファイル: rf_benchmark.py プロジェクト: Albert-Z-Guo/tensorflow

def _construct_model(model_type='resnet_v1_50'):
  """Constructs model for the desired type of CNN.

  Args:
    model_type: Type of model to be used.

  Returns:
    end_points: A dictionary from components of the network to the corresponding
      activations.

  Raises:
    ValueError: If the model_type is not supported.
  """
  # Placeholder input.
  images = array_ops.placeholder(
      dtypes.float32, shape=(1, None, None, 3), name=_INPUT_NODE)

  # Construct model.
  if model_type == 'inception_resnet_v2':
    _, end_points = inception.inception_resnet_v2_base(images)
  elif model_type == 'inception_resnet_v2-same':
    _, end_points = inception.inception_resnet_v2_base(
        images, align_feature_maps=True)
  elif model_type == 'inception_v2':
    _, end_points = inception.inception_v2_base(images)
  elif model_type == 'inception_v2-no-separable-conv':
    _, end_points = inception.inception_v2_base(
        images, use_separable_conv=False)
  elif model_type == 'inception_v3':
    _, end_points = inception.inception_v3_base(images)
  elif model_type == 'inception_v4':
    _, end_points = inception.inception_v4_base(images)
  elif model_type == 'alexnet_v2':
    _, end_points = alexnet.alexnet_v2(images)
  elif model_type == 'vgg_a':
    _, end_points = vgg.vgg_a(images)
  elif model_type == 'vgg_16':
    _, end_points = vgg.vgg_16(images)
  elif model_type == 'mobilenet_v1':
    _, end_points = mobilenet_v1.mobilenet_v1_base(images)
  elif model_type == 'mobilenet_v1_075':
    _, end_points = mobilenet_v1.mobilenet_v1_base(
        images, depth_multiplier=0.75)
  elif model_type == 'resnet_v1_50':
    _, end_points = resnet_v1.resnet_v1_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_101':
    _, end_points = resnet_v1.resnet_v1_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_152':
    _, end_points = resnet_v1.resnet_v1_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v1_200':
    _, end_points = resnet_v1.resnet_v1_200(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_50':
    _, end_points = resnet_v2.resnet_v2_50(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_101':
    _, end_points = resnet_v2.resnet_v2_101(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_152':
    _, end_points = resnet_v2.resnet_v2_152(
        images, num_classes=None, is_training=False, global_pool=False)
  elif model_type == 'resnet_v2_200':
    _, end_points = resnet_v2.resnet_v2_200(
        images, num_classes=None, is_training=False, global_pool=False)
  else:
    raise ValueError('Unsupported model_type %s.' % model_type)

  return end_points

コード例 #23

0

ファイルを表示

ファイル: run.py プロジェクト: fitrialif/L44_Mini_Project

num_steps_per_epoch = num_batches_per_epoch 

checkpoints_dir = '/tmp/checkpoints'

tf.reset_default_graph()

images = tf.placeholder(tf.float32,shape=[None,height_image,width_image,3])
labels = tf.placeholder(tf.float32,shape=[None,3])
learning_rate = tf.placeholder(tf.float32,shape=[])
keep_prob = tf.placeholder(tf.float32,shape=[])

with slim.arg_scope(resnet_arg_scope()):
    #restore resnet101 model
    #imgs = tf.map_fn(vgg_preprocessing.preprocess_image(fname, height_image, width_image, data_type) 
    #imgs = [vgg_preprocessing.preprocess_image(fname, height_image, width_image, data_type) for fname in imgs]
    resnet_logits, end_points = resnet_v1_101(images, num_classes=3, global_pool=True, is_training=True)
    

def feed_dict(batch_size, data_type, epoch):
  keep_prob_per = keep_prob_val
  lr = initial_learning_rate
    
  if data_type == 1:
    data = get_images(data_dir,data_type,batch_size)
    keep_prob_per = keep_prob_val

  elif data_type == 2:
    data = get_images(data_dir,data_type,batch_size)
    keep_prob_per = 1
    
  elif data_type == 3:

コード例 #24

0

ファイルを表示

def main():
    data_path = '<train-CARLA-VP.tfrecords>'

    model_type = 'vgg-16'
    train_dir = '<saved_model_path>'
    est_label = 'horvpz'

    num_bins = 500

    sphere_params = np.load('<carlavp_label_to_horvpz_fov_pitch.npz>')
    all_bins = sphere_params['all_bins']
    all_sphere_centres = sphere_params['all_sphere_centres']
    all_sphere_radii = sphere_params['all_sphere_radii']

    if est_label == 'horfov':
        fov_bins = np.arange(15, 115, 100 / num_bins)
        half_fov_bin_size = (fov_bins[1] - fov_bins[0]) / 2

    if model_type == 'inceptionv4':
        net_width = 299
        net_height = 299
    else:
        net_width = 224
        net_height = 224
    if model_type == 'vgg-m':
        model = pickle.load(open("<vggm-tf.p>", "rb"))
        average_image = np.load('<vgg_average_image.npy>')
    elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101':
        _R_MEAN = 123.68
        _G_MEAN = 116.78
        _B_MEAN = 103.94
        resnet_average_channels = np.array(np.concatenate(
            (np.tile(_R_MEAN, (net_height, net_width, 1)),
             np.tile(_G_MEAN, (net_height, net_width, 1)),
             np.tile(_B_MEAN, (net_height, net_width, 1))),
            axis=2),
                                           dtype=np.float32)
    elif model_type == 'inceptionv1' or model_type == 'inceptionv4':
        print("Nothing needs to be initialized for this cnn model")
    else:
        print("ERROR: No such CNN exists")
    if est_label == 'horfov':
        no_params_model = 3
    elif est_label == 'horvpz':
        no_params_model = 4
    else:
        print("ERROR: No such 'est_label'")

    max_batch_size = 60

    total_examples = sum(1 for _ in tf.python_io.tf_record_iterator(data_path))
    print("Total examples: ", total_examples)

    divs = np.array(list(factors(total_examples)))
    sorted_divs = divs[divs.argsort()]
    batch_size = sorted_divs[sorted_divs < max_batch_size][-1]
    print("Batch Size:", batch_size)

    ct = np.arange(11, 12, 4)

    best_avg_man_loss = np.inf

    for en, consider_top in enumerate(ct):

        total_manhattan_loss = np.zeros(5)

        with tf.Graph().as_default():
            tf.logging.set_verbosity(tf.logging.INFO)
            filename_queue = tf.train.string_input_producer([data_path])
            image, label, carla_width, carla_height = util_tfio.general_read_and_decode(
                filename_queue, num_classes=8, dtype=tf.float64)

            image = tf.image.resize_images(
                image, [net_width, net_height],
                method=tf.image.ResizeMethod.BILINEAR)

            if model_type == 'vgg-m':
                image = image - average_image
            elif model_type == 'resnet50' or model_type == 'vgg-16' or model_type == 'resnet101':
                image = image - resnet_average_channels
            elif model_type == 'inceptionv1' or model_type == 'inceptionv4':
                image = tf.cast(image, tf.float32) * (1. / 255)
                image = (image - 0.5) * 2
            else:
                print("ERROR: No such CNN exists")

            images, labels, carla_widths, carla_heights = tf.train.batch(
                [image, label, carla_width, carla_height],
                batch_size=batch_size,
                num_threads=1,
                capacity=5 * batch_size)

            print(images)

            if model_type == 'vgg-m':
                logits = vgg_m.cnn_vggm(images,
                                        num_classes=num_bins * no_params_model,
                                        model=model)
            elif model_type == 'resnet50':
                with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope:
                    logits, _ = resnet_v1.resnet_v1_50(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False,
                        global_pool=True)  # , reuse=True)#
            elif model_type == 'resnet101':
                with slim.arg_scope(resnet_v1.resnet_arg_scope()) as scope:
                    logits, _ = resnet_v1.resnet_v1_101(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False,
                        global_pool=True)  # , reuse=True)#
            elif model_type == 'vgg-16':
                with slim.arg_scope(vgg.vgg_arg_scope()) as scope:
                    logits, _ = vgg.vgg_16(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False
                    )  # , global_pool=False)#, reuse=True)#
            elif model_type == 'inceptionv1':
                with slim.arg_scope(
                        inception_v1.inception_v1_arg_scope()) as scope:
                    logits, _ = inception_v1.inception_v1(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False
                    )  # , global_pool=False)#, reuse=True)#
            elif model_type == 'inceptionv4':
                with slim.arg_scope(
                        inception_v4.inception_v4_arg_scope()) as scope:
                    logits, _ = inception_v4.inception_v4(
                        images,
                        num_classes=num_bins * no_params_model,
                        is_training=False
                    )  # , global_pool=False)#, reuse=True)#
            else:
                print("ERROR: No such CNN exists")

            checkpoint_path = train_dir
            init_fn = slim.assign_from_checkpoint_fn(
                checkpoint_path, slim.get_variables_to_restore())

            print("--------------------------------------------------------")
            print("No. of examples not evaluated because of batch size:",
                  np.mod(total_examples, batch_size))
            print("--------------------------------------------------------")

            with tf.Session() as sess:
                with slim.queues.QueueRunners(sess):
                    sess.run(tf.initialize_local_variables())
                    init_fn(sess)

                    for loop_no in range(
                            int(np.floor(total_examples / batch_size))):
                        np_rawpreds, np_images_raw, np_labels, np_width, np_height = sess.run(
                            [
                                logits, images, labels, carla_widths,
                                carla_heights
                            ])

                        for i in range(batch_size):
                            pred_indices = np.zeros(no_params_model,
                                                    dtype=np.int32)
                            output_vals = np_rawpreds[i, :].squeeze().reshape(
                                no_params_model, -1)

                            for ln in range(no_params_model):
                                predsoft = my_softmax(
                                    output_vals[ln, :][np.newaxis]).squeeze()

                                topindices = predsoft.argsort(
                                )[::-1][:consider_top]
                                probsindices = predsoft[topindices] / np.sum(
                                    predsoft[topindices])
                                pred_indices[ln] = np.abs(
                                    int(
                                        np.round(
                                            np.sum(probsindices *
                                                   topindices))))

                            if est_label == 'horfov':
                                estimated_input_points = get_horvpz_from_projected_4indices_modified(
                                    np.hstack(
                                        (pred_indices[:2], 0, 0)), all_bins,
                                    all_sphere_centres, all_sphere_radii)
                                my_fov = fov_bins[
                                    pred_indices[2]] + half_fov_bin_size
                                fx, fy, roll_from_horizon, my_tilt = get_intrinisic_extrinsic_params_from_horfov(
                                    img_dims=(np_width[i], np_height[i]),
                                    horizonvector=estimated_input_points,
                                    fov=my_fov,
                                    net_dims=(net_width, net_height))

                            elif est_label == 'horvpz':
                                estimated_input_points = get_horvpz_from_projected_4indices_modified(
                                    pred_indices[:4], all_bins,
                                    all_sphere_centres, all_sphere_radii)
                                fx, fy, roll_from_horizon, my_tilt = \
                                    get_intrinisic_extrinsic_params_from_horizonvector_vpz(
                                        img_dims=(np_width[i], np_height[i]),
                                        horizonvector_vpz=estimated_input_points,
                                        net_dims=(net_width, net_height))

                            my_fov_fx = degrees(
                                np.arctan(np_width[i] / (2 * fx)) * 2)
                            my_fov_fy = degrees(
                                np.arctan(np_width[i] / (2 * fy)) * 2)
                            my_tilt = -degrees(my_tilt)
                            roll_from_horizon = roll_from_horizon

                            gt_label = np_labels[i, :].reshape(4, -1)
                            gt_fov = gt_label[3, 0]
                            gt_pitch = gt_label[3, 1]
                            gt_roll = degrees(
                                atan((gt_label[1, 1] - gt_label[0, 1]) /
                                     (gt_label[1, 0] - gt_label[0, 0])))

                            manhattan_loss = [
                                np.abs(my_fov_fx - gt_fov),
                                np.abs(my_fov_fy - gt_fov),
                                np.abs(((my_fov_fx + my_fov_fy) / 2) - gt_fov),
                                np.abs(my_tilt - gt_pitch),
                                np.abs(roll_from_horizon - gt_roll)
                            ]

                            total_manhattan_loss += manhattan_loss

        avg_manhattan_loss = total_manhattan_loss / total_examples

        print("ct:", consider_top, "Average manhattan loss per scalar: ",
              avg_manhattan_loss)
        print(
            "-------------------------------------------------------------------"
        )

        this_loss = np.mean(
            np.hstack((avg_manhattan_loss[1], avg_manhattan_loss[3:])))
        if this_loss < best_avg_man_loss:
            best_avg_man_loss = this_loss
            display_loss = [
                consider_top, -1, avg_manhattan_loss[1], avg_manhattan_loss[3],
                avg_manhattan_loss[4]
            ]

    print("Best loss:", display_loss)

コード例 #25

0

ファイルを表示

def batch_prediction(frame_id_to_path, frame_id_to_image_ids, image_id_to_coordinates, model, image_size, sess, \
                    debug=_prediction_debug):
    print "batch processing: " + str(len(image_id_to_coordinates))
    if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4' or \
            model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or model == 'mobilenet_v1_1.0_224' or \
            model == 'inception_resnet_v2' or model == 'nasnet_mobile' or model == 'nasnet_large':
        preprocessing_type = 'inception'
    elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152':
        preprocessing_type = 'vgg'
    image_id_to_predictions = {}
    image_ids = []
    count = 0
    start_time_1 = time.time()
    for frame_id, path in frame_id_to_path.iteritems():
        frame_string = open(path, 'rb').read()
        frame = tf.image.decode_jpeg(frame_string, channels=3)
        #plt.imshow(PIL.Image.open(StringIO.StringIO(sess.run(tf.image.encode_jpeg(frame)))))
        #plt.show()
        frame_np = cv2.imread(path, cv2.IMREAD_COLOR)
        frame_height, frame_width = frame_np.shape[:2]
        #print frame_np.shape
        if preprocessing_type == 'inception':
            processed_frame = preprocess_for_inception(frame,
                                                       frame_height,
                                                       frame_width,
                                                       sess,
                                                       central_fraction=1.0,
                                                       debug=_prediction_debug)
        elif preprocessing_type == 'vgg':
            processed_frame = preprocess_for_vgg(frame,
                                                 frame_height,
                                                 frame_width,
                                                 frame_height,
                                                 sess,
                                                 debug=_prediction_debug)
        start_time = time.time()
        height, width = processed_frame.shape[:2].as_list()
        #print "Size: "+str(width)+", "+str(height)
        #plt.imshow(PIL.Image.open(StringIO.StringIO(sess.run(tf.image.encode_jpeg(tf.cast(processed_frame, tf.uint8))))))
        #plt.show()
        for image_id in frame_id_to_image_ids[frame_id]:
            fields = image_id_to_coordinates[image_id].split('\t')
            x = int(width * float(fields[0]))
            y = int(height * float(fields[1]))
            w = int(width * float(fields[2]))
            h = int(height * float(fields[3]))
            processed_image = tf.image.crop_to_bounding_box(
                processed_frame, y, x, h, w)
            if debug:
                print "object at " + str(fields)
                print str(x) + ", " + str(y) + ", " + str(w) + ", " + str(
                    h) + ", " + str(frame_height - y - h)
                if preprocessing_type == 'vgg':
                    plt.imshow(
                        PIL.Image.open(
                            StringIO.StringIO(
                                sess.run(
                                    tf.image.encode_jpeg(
                                        tf.cast(processed_image, tf.uint8))))))
                elif preprocessing_type == 'inception':
                    plt.imshow(
                        PIL.Image.open(
                            StringIO.StringIO(
                                sess.run(
                                    tf.image.encode_jpeg(
                                        tf.cast(
                                            tf.multiply(processed_image, 255),
                                            tf.uint8))))))
                plt.show()
            processed_image = tf.image.resize_images(processed_image,
                                                     (image_size, image_size))
            if debug:
                print "resized"
                if preprocessing_type == 'vgg':
                    plt.imshow(
                        PIL.Image.open(
                            StringIO.StringIO(
                                sess.run(
                                    tf.image.encode_jpeg(
                                        tf.cast(processed_image, tf.uint8))))))
                elif preprocessing_type == 'inception':
                    plt.imshow(
                        PIL.Image.open(
                            StringIO.StringIO(
                                sess.run(
                                    tf.image.encode_jpeg(
                                        tf.cast(
                                            tf.multiply(processed_image, 255),
                                            tf.uint8))))))
                plt.show()
            if count == 0:
                processed_images = tf.expand_dims(processed_image, 0)
            else:
                local_matrix = tf.expand_dims(processed_image, 0)
                processed_images = tf.concat([processed_images, local_matrix],
                                             0)
            image_ids.append(image_id)
            count = count + 1
    print "Preparation: " + str(time.time() - start_time_1) + " seconds"
    start_time = time.time()
    if model == 'inception_v1':
        logits, _ = inception.inception_v1(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v1.ckpt'),
            slim.get_model_variables('InceptionV1'))
    elif model == 'inception_v2':
        logits, _ = inception.inception_v2(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v2.ckpt'),
            slim.get_model_variables('InceptionV2'))
    elif model == 'inception_v3':
        logits, _ = inception.inception_v3(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v3.ckpt'),
            slim.get_model_variables('InceptionV3'))
    elif model == 'inception_v4':
        logits, _ = inception.inception_v4(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
            slim.get_model_variables('InceptionV4'))
    elif model == 'resnet_v1_50':
        logits, _ = resnet_v1.resnet_v1_50(processed_images,
                                           num_classes=1000,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v1_50.ckpt'),
            slim.get_model_variables('resnet_v1_50'))
    elif model == 'resnet_v1_101':
        logits, _ = resnet_v1.resnet_v1_101(processed_images,
                                            num_classes=1000,
                                            is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v1_101.ckpt'),
            slim.get_model_variables('resnet_v1_101'))
    elif model == 'resnet_v1_152':
        logits, _ = resnet_v1.resnet_v1_152(processed_images,
                                            num_classes=1000,
                                            is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v1_152.ckpt'),
            slim.get_model_variables('resnet_v1_152'))
    elif model == 'mobilenet_v1_0.25_128':
        logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \
                                              depth_multiplier=0.25)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'mobilenet_v1_0.25_128.ckpt'),
            slim.get_model_variables('MobilenetV1'))
    elif model == 'mobilenet_v1_0.50_160':
        logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \
                                              depth_multiplier=0.50)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'mobilenet_v1_0.50_160.ckpt'),
            slim.get_model_variables('MobilenetV1'))
    elif model == 'mobilenet_v1_1.0_224':
        logits, _ = mobilenet_v1.mobilenet_v1(processed_images, num_classes=1001, is_training=False, \
                                              depth_multiplier=1.0)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'mobilenet_v1_1.0_224.ckpt'),
            slim.get_model_variables('MobilenetV1'))
    elif model == 'inception_resnet_v2':
        logits, _ = inception_resnet_v2.inception_resnet_v2(processed_images,
                                                            num_classes=1001,
                                                            is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir,
                         'inception_resnet_v2_2016_08_30.ckpt'),
            slim.get_model_variables('InceptionResnetV2'))
    elif model == 'nasnet_mobile':
        logits, _ = nasnet.build_nasnet_mobile(processed_images,
                                               num_classes=1001,
                                               is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'model.ckpt'),
            slim.get_model_variables())
    elif model == 'nasnet_large':
        logits, _ = nasnet.build_nasnet_large(processed_images,
                                              num_classes=1001,
                                              is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'model.ckpt'),
            slim.get_model_variables())
    elif model == 'vgg_16':
        logits, _ = vgg.vgg_16(processed_images,
                               num_classes=1000,
                               is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
            slim.get_model_variables('vgg_16'))
    print "Prediction2.1: " + str(time.time() - start_time) + " seconds"
    start_time = time.time()
    init_fn(sess)
    print "Prediction2.2: " + str(time.time() - start_time) + " seconds"
    probabilities = tf.nn.softmax(logits)

    start_time = time.time()
    np_image, probabilities = sess.run([frame, probabilities])
    runtime = time.time() - start_time
    print "Prediction: " + str(runtime) + " seconds"
    for k in range(len(image_ids)):
        image_id = image_ids[k]
        predictions = []
        prob = probabilities[k, 0:]
        sorted_inds = [
            i[0] for i in sorted(enumerate(-prob), key=lambda x: x[1])
        ]
        for i in range(5):
            index = sorted_inds[i]
            if model == 'inception_v1' or model == 'inception_v2' or \
                    model == 'inception_v3' or model == 'inception_v4' or \
                    model == 'mobilenet_v1_0.25_128' or model == 'mobilenet_v1_0.50_160' or model == 'mobilenet_v1_1.0_224' or \
                    model == 'inception_resnet_v2' or model == 'nasnet_mobile' or model == 'nasnet_large':
                name = names[index]
            elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152':
                name = names[index + 1]
            pr = prob[index]
            pair = (name, pr)
            predictions.append(pair)
        image_id_to_predictions[image_id] = predictions
    return image_id_to_predictions, runtime, sess

コード例 #26

0

ファイルを表示

    def __init__(self, num_classes, train_layers=None, weights_path='DEFAULT'):
        """Create the graph of the resnetv1_101 model.
        """

        # Parse input arguments into class variables
        if weights_path == 'DEFAULT':
            self.WEIGHTS_PATH = "./pre_trained_models/resnet_v1_101.ckpt"
        else:
            self.WEIGHTS_PATH = weights_path
        self.train_layers = train_layers

        with tf.variable_scope("input"):
            self.image_size = resnet_v1.resnet_v1_101.default_image_size
            self.x_input = tf.placeholder(
                tf.float32, [None, self.image_size, self.image_size, 3],
                name="x_input")
            self.y_input = tf.placeholder(tf.float32, [None, num_classes],
                                          name="y_input")
            self.learning_rate = tf.placeholder(tf.float32,
                                                name="learning_rate")

        # train
        with arg_scope(resnet_v1.resnet_arg_scope()):
            self.logits, _ = resnet_v1.resnet_v1_101(self.x_input,
                                                     num_classes=num_classes,
                                                     is_training=True,
                                                     reuse=tf.AUTO_REUSE)

        # validation
        with arg_scope(resnet_v1.resnet_arg_scope()):
            self.logits_val, _ = resnet_v1.resnet_v1_101(
                self.x_input,
                num_classes=num_classes,
                is_training=False,
                reuse=tf.AUTO_REUSE)

        with tf.name_scope("loss"):
            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.logits, labels=self.y_input))
            self.loss_val = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(
                    logits=self.logits_val, labels=self.y_input))

        with tf.name_scope("train"):

            self.global_step = tf.Variable(0,
                                           name="global_step",
                                           trainable=False)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

            var_list = [
                v for v in tf.trainable_variables()
                if v.name.split('/')[-2] in train_layers
                or v.name.split('/')[-3] in train_layers
            ]
            gradients = tf.gradients(self.loss, var_list)
            self.grads_and_vars = list(zip(gradients, var_list))
            optimizer = tf.train.GradientDescentOptimizer(self.learning_rate)

            with tf.control_dependencies(update_ops):
                self.train_op = optimizer.apply_gradients(
                    grads_and_vars=self.grads_and_vars,
                    global_step=self.global_step)

        with tf.name_scope("probability"):
            self.probability = tf.nn.softmax(self.logits_val,
                                             name="probability")

        with tf.name_scope("prediction"):
            self.prediction = tf.argmax(self.logits_val, 1, name="prediction")

        with tf.name_scope("accuracy"):
            correct_prediction = tf.equal(self.prediction,
                                          tf.argmax(self.y_input, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,
                                                   "float"),
                                           name="accuracy")

コード例 #27

0

ファイルを表示

ファイル: model_muti_out.py プロジェクト: yanghedada/huiwei_chinese_OCR

def model(images, weight_decay=1e-5, is_training=True):
    '''
    define the model, we use slim's implemention of resnet
    resnet_v1_50/block1 (?, ?, ?, 256)
    resnet_v1_50/block2 (?, ?, ?, 512)
    resnet_v1_50/block3 (?, ?, ?, 1024)
    resnet_v1_50/block4 (?, ?, ?, 2048)
    Shape of f_0 (?, ?, ?, 2048)
    Shape of f_1 (?, ?, ?, 512)
    Shape of f_2 (?, ?, ?, 256)
    Shape of f_3 (?, ?, ?, 64)
    Shape of h_0 (?, ?, ?, 2048), g_0 (?, ?, ?, 2048)
    Shape of h_1 (?, ?, ?, 128), g_1 (?, ?, ?, 128)
    Shape of h_2 (?, ?, ?, 64), g_2 (?, ?, ?, 64)
    Shape of h_3 (?, ?, ?, 32), g_3 (?, ?, ?, 32)

    '''
    F_score = []
    F_geometry = []

    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        # logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50')
        logits, end_points = resnet_v1.resnet_v1_101(images,
                                                     is_training=is_training,
                                                     scope='resnet_v1_101')
    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['pool5'], end_points['pool4'], end_points['pool3'],
                end_points['pool2']
            ]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:  # 最底层h1
                    f[i] = slim.conv2d(f[i], 2048, 1)  # 最底层的1x1conv

                    h[i] = f[i]
                else:
                    f[i] = slim.conv2d(f[i], num_outputs[i],
                                       1)  # f:1,2,3 的 1x1conv

                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:  # 中间层 h2,h3,
                    g[i] = unpool(h[i])

                else:  # 最高层 h4 对h4进行卷积预测
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(
                    i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            f_score_32 = slim.conv2d(g[3],
                                     1,
                                     1,
                                     activation_fn=tf.nn.sigmoid,
                                     normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(
                g[3], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * FLAGS.text_scale
            # geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            angle_map = (slim.conv2d(
                g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            f_geometry_32 = tf.concat([geo_map, angle_map], axis=-1)

            F_score.append(f_score_32)
            F_geometry.append(f_geometry_32)

            g[2] = slim.conv2d(h[2], 64, 3)
            f_score_64 = slim.conv2d(g[2],
                                     1,
                                     1,
                                     activation_fn=tf.nn.sigmoid,
                                     normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(
                g[2], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * FLAGS.text_scale
            # geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None)
            angle_map = (slim.conv2d(
                g[2], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            f_geometry_64 = tf.concat([geo_map, angle_map], axis=-1)

            F_score.append(f_score_64)
            F_geometry.append(f_geometry_64)

    return F_score, F_geometry

コード例 #28

0

ファイルを表示

ファイル: e2e_interactive_learning_CUB.py プロジェクト: zhanghaoyue/cvpr20_IMCL

(img_test_ids,test_img,test_labels,test_attributes) = dataset_in_3.make_one_shot_iterator().get_next()
(img_test_ids,test_img_v,test_labels,test_attributes)=sess.run([img_test_ids,test_img,test_labels,test_attributes])
test_attributes = test_attributes[:,:,0]
#%%
#sparse_dict_img_id = tf.constant(sparse_dict_img_id)
#sparse_dict_img = tf.constant(sparse_dict_img)
#sparse_dict_label = tf.constant(sparse_dict_label)
#sparse_dict_Attributes = tf.constant(sparse_dict_Attributes)
#%%
image_size = resnet_v1.resnet_v1_101.default_image_size
height = image_size
width = image_size
img_input_ph = tf.placeholder(dtype=tf.float32,shape=[None,height,width,3])#tf.concat([img,sparse_dict_img],axis = 0,name='img_input_point')
#%%
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
    logit, end_points = resnet_v1.resnet_v1_101(img_input_ph, num_classes=1000, is_training=is_use_batch_norm,reuse=tf.AUTO_REUSE)
#    init_fn = slim.assign_from_checkpoint_fn(checkpoints_dir,slim.get_model_variables())
    features_concat = g.get_tensor_by_name('resnet_v1_101/pool5:0')

#%%
features_concat = tf.squeeze(features_concat)
features_concat = tf.concat([features_concat,tf.ones([tf.shape(features_concat)[0],1])],axis = 1,name='feature_input_point')
index_point = tf.placeholder(dtype=tf.int32,shape=())
F = features_concat[:index_point,:]
sparse_dict = features_concat[index_point:,:]
F_concat_ph = g.get_tensor_by_name('feature_input_point:0')
#%%
alpha_colaborative_var = tf.get_variable('alphha_colaborative',dtype=tf.float32,trainable=False, shape=())
alpha_colaborative_var_fh = tf.placeholder(dtype=tf.float32, shape=())

alpha_feature_var = tf.get_variable('alpha_feature',dtype=tf.float32,trainable=False, shape=())

コード例 #29

0

ファイルを表示

ファイル: resnet_v1_101_16s.py プロジェクト: dav-sap/school_project

def resnet_v1_101_16s(image_batch_tensor,
                      number_of_classes,
                      is_training):
    """Returns the resnet_v1_101_16s model definition.
    The function returns the model definition of a network that was described
    in 'DeepLab: Semantic Image Segmentation with Deep Convolutional Nets,
    Atrous Convolution, and Fully Connected CRFs' by Chen et al.
    The network subsamples the input by a factor of 16 and uses the bilinear
    upsampling kernel to upsample prediction by a factor of 16. This means that
    if the image size is not of the factor 16, the prediction of different size
    will be delivered. To adapt the network for an any size input use 
    adapt_network_for_any_size_input(resnet_v1_101_16s, 16). Note: the upsampling kernel
    is fixed in this model definition, because it didn't give significant
    improvements according to aforementioned paper.
    
    Parameters
    ----------
    image_batch_tensor : [batch_size, height, width, depth] Tensor
        Tensor specifying input image batch
    number_of_classes : int
        An argument specifying the number of classes to be predicted.
        For example, for PASCAL VOC it is 21.
    is_training : boolean
        An argument specifying if the network is being evaluated or trained.
    
    Returns
    -------
    upsampled_logits : [batch_size, height, width, number_of_classes] Tensor
        Tensor with logits representing predictions for each class.
        Be careful, the output can be of different size compared to input,
        use adapt_network_for_any_size_input to adapt network for any input size.
        Otherwise, the input images sizes should be of multiple 8.
    resnet_v1_101_16s_variables_mapping : dict {string: variable}
        Dict which maps the resnet_v1_101_16s model's variables to resnet_v1_101 checkpoint variables
        names. We need this to initilize the weights of resnet_v1_101_16s model with resnet_v1_101 from
        checkpoint file. Look at ipython notebook for examples.
    """
    
    with tf.variable_scope("resnet_v1_101_16s") as resnet_v1_101_16s:

        upsample_factor = 16

        # Convert image to float32 before subtracting the
        # mean pixel value
        image_batch_float = tf.to_float(image_batch_tensor)

        # Subtract the mean pixel value from each pixel
        mean_centered_image_batch = image_batch_float - [_R_MEAN, _G_MEAN, _B_MEAN]

        upsample_filter_np = bilinear_upsample_weights(upsample_factor,
                                                       number_of_classes)

        upsample_filter_tensor = tf.constant(upsample_filter_np)

        # TODO: make pull request to get this custom vgg feature accepted
        # to avoid using custom slim repo.
        
        
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(mean_centered_image_batch,
                                                number_of_classes,
                                                is_training=is_training,
                                                global_pool=False,
                                                output_stride=16)
        

        downsampled_logits_shape = tf.shape(logits)

        # Calculate the ouput size of the upsampled tensor
        upsampled_logits_shape = tf.pack([
                                          downsampled_logits_shape[0],
                                          downsampled_logits_shape[1] * upsample_factor,
                                          downsampled_logits_shape[2] * upsample_factor,
                                          downsampled_logits_shape[3]
                                         ])

        # Perform the upsampling
        upsampled_logits = tf.nn.conv2d_transpose(logits,
                                                  upsample_filter_tensor,
                                                  output_shape=upsampled_logits_shape,
                                                  strides=[1, upsample_factor, upsample_factor, 1])

        # Map the original vgg-16 variable names
        # to the variables in our model. This is done
        # to make it possible to use assign_from_checkpoint_fn()
        # while providing this mapping.
        # TODO: make it cleaner
        resnet_v1_101_16s_variables_mapping = {}

        resnet_v1_101_16s_variables = slim.get_variables(resnet_v1_101_16s)

        for variable in resnet_v1_101_16s_variables:

            # Here we remove the part of a name of the variable
            # that is responsible for the current variable scope
            original_resnet_v1_101_checkpoint_string = variable.name[len(resnet_v1_101_16s.original_name_scope):-2]
            resnet_v1_101_16s_variables_mapping[original_resnet_v1_101_checkpoint_string] = variable

    return upsampled_logits, resnet_v1_101_16s_variables_mapping

コード例 #30

0

ファイルを表示

def batch_prediction(image_id_to_path, model, sess):
    print "batch processing: " + str(len(image_id_to_path))
    image_id_to_predictions = {}
    image_ids = []
    count = 0
    start_time_1 = time.time()
    for image_id, path in image_id_to_path.iteritems():
        image_string = open(path, 'rb').read()
        image = tf.image.decode_jpeg(image_string, channels=3)
        if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4':
            processed_image = preprocess_for_inception(image,
                                                       image_size,
                                                       image_size,
                                                       central_fraction=1.0)
        elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152':
            processed_image = vgg_preprocessing.preprocess_image(
                image, image_size, image_size, is_training=False)
        start_time = time.time()
        #print processed_image.shape
        #np_val = sess.run(processed_image)
        #print np_val.shape
        #processed_image = tf.convert_to_tensor(np_val)
        #print processed_image.shape
        #print "conversion: "+str(time.time()-start_time)+" seconds"
        if count == 0:
            processed_images = tf.expand_dims(processed_image, 0)
        else:
            local_matrix = tf.expand_dims(processed_image, 0)
            processed_images = tf.concat([processed_images, local_matrix], 0)
        image_ids.append(image_id)
        count = count + 1
    print "Preparation: " + str(time.time() - start_time_1) + " seconds"
    start_time = time.time()
    if model == 'inception_v1':
        logits, _ = inception.inception_v1(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v1.ckpt'),
            slim.get_model_variables('InceptionV1'))
    elif model == 'inception_v2':
        logits, _ = inception.inception_v2(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v2.ckpt'),
            slim.get_model_variables('InceptionV2'))
    elif model == 'inception_v3':
        logits, _ = inception.inception_v3(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v3.ckpt'),
            slim.get_model_variables('InceptionV3'))
    elif model == 'inception_v4':
        logits, _ = inception.inception_v4(processed_images,
                                           num_classes=1001,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'inception_v4.ckpt'),
            slim.get_model_variables('InceptionV4'))
    elif model == 'resnet_v1_50':
        logits, _ = resnet_v1.resnet_v1_50(processed_images,
                                           num_classes=1000,
                                           is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v1_50.ckpt'),
            slim.get_model_variables('resnet_v1_50'))
    elif model == 'resnet_v1_101':
        logits, _ = resnet_v1.resnet_v1_101(processed_images,
                                            num_classes=1000,
                                            is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v1_101.ckpt'),
            slim.get_model_variables('resnet_v1_101'))
    elif model == 'resnet_v1_152':
        logits, _ = resnet_v1.resnet_v1_152(processed_images,
                                            num_classes=1000,
                                            is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'resnet_v1_152.ckpt'),
            slim.get_model_variables('resnet_v1_152'))
    elif model == 'vgg_16':
        logits, _ = vgg.vgg_16(processed_images,
                               num_classes=1000,
                               is_training=False)
        init_fn = slim.assign_from_checkpoint_fn(
            os.path.join(checkpoints_dir, 'vgg_16.ckpt'),
            slim.get_model_variables('vgg_16'))
    print "Prediction2.1: " + str(time.time() - start_time) + " seconds"
    start_time = time.time()
    init_fn(sess)
    print "Prediction2.2: " + str(time.time() - start_time) + " seconds"
    probabilities = tf.nn.softmax(logits)
    print "Prediction1: " + str(time.time() - start_time) + " seconds"

    start_time = time.time()
    np_image, probabilities = sess.run([image, probabilities])
    runtime = time.time() - start_time
    print "Prediction: " + str(runtime) + " seconds"
    for k in range(len(image_ids)):
        image_id = image_ids[k]
        predictions = []
        prob = probabilities[k, 0:]
        sorted_inds = [
            i[0] for i in sorted(enumerate(-prob), key=lambda x: x[1])
        ]
        for i in range(5):
            index = sorted_inds[i]
            if model == 'inception_v1' or model == 'inception_v2' or model == 'inception_v3' or model == 'inception_v4':
                name = names[index]
            elif model == 'vgg_16' or model == 'resnet_v1_50' or model == 'resnet_v1_101' or model == 'resnet_v1_152':
                name = names[index + 1]
            pr = prob[index]
            pair = (name, pr)
            predictions.append(pair)
        image_id_to_predictions[image_id] = predictions
    return image_id_to_predictions, runtime, sess

コード例 #31

0

ファイルを表示

ファイル: model.py プロジェクト: iamacewhite/EAST-GP

def model(images,
          valid_affines,
          seq_len,
          mask,
          weight_decay=1e-5,
          is_training=True,
          model=FLAGS.base_model):
    '''
    define the model, we use slim's implemention of resnet
    '''
    images = mean_image_subtraction(images, [128, 128, 128])
    if model == "reset_v1_50":
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                images, is_training=is_training, scope='resnet_v1_50')
        features = ['pool5', 'pool4', 'pool3', 'pool2']
    elif model == "resnet_v1_101":
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                images, is_training=is_training, scope='resnet_v1_101')
        features = ['pool5', 'pool4', 'pool3', 'pool2']
    elif model == "resnet_v2_101":
        with slim.arg_scope(
                resnet_v2.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v2.resnet_v2_101(
                images, is_training=is_training, scope='resnet_v2_101')
        features = ['pool5', 'pool4', 'pool3', 'pool2']
    elif model == "inception_v4":
        with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
            logits, end_points = inception_v4.inception_v4(
                images,
                num_classes=None,
                is_training=is_training,
                scope='inception_v4')
        features = ['Mixed_7b', 'Mixed_6b' 'Mixed5a', 'Mixed_3a']
    elif model == "inception_resnet_v2":
        with slim.arg_scope(
                inception_resnet_v2.inception_resnet_v2_arg_scope()):
            logits, end_points = inception_resnet_v2.inception_resnet_v2(
                images,
                num_classes=None,
                is_training=is_training,
                scope='inception_resnet_v2')
        features = ['Mixed_7a', 'Mixed_6a', 'Mixed_5b', 'MaxPool_3a_3x3']
    #pretty(end_points)
    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):

            f = [end_points[fea] for fea in features]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    c1_2 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    a = slim.conv2d(slim.conv2d(c1_1, num_outputs[i], 3),
                                    num_outputs[i] // 2, 3)
                    b = slim.conv2d(c1_2, num_outputs[i] // 2, 3)
                    h[i] = tf.concat([a, b], axis=-1)
                    #h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    # g[i] = slim.conv2d(slim.conv2d(h[i], num_outputs[i], 3), num_outputs[i], 3)
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(
                    i, h[i].shape, i, g[i].shape))
            print('Shape before ROI rotate: {}'.format(g[3].shape))
            text_proposals = roi_rotate(g[3], valid_affines, mask)
            rotated_image = roi_rotate_test(images, valid_affines, mask)
            print('Shape after ROI rotate: {}'.format(text_proposals.shape))
            recon_f = slim.conv2d(text_proposals, 64, 3)
            recon_f = slim.conv2d(recon_f, 64, 3)
            recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1])
            recon_f = slim.conv2d(recon_f, 128, 3)
            recon_f = slim.conv2d(recon_f, 128, 3)
            recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1])
            recon_f = slim.conv2d(recon_f, 256, 3)
            recon_f = slim.conv2d(recon_f, 256, 3)
            recon_f = slim.max_pool2d(recon_f, [2, 1], stride=[2, 1])

            logits = lstm_ctc(recon_f, seq_len)

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3],
                                  1,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(
                g[3], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * FLAGS.text_scale
            angle_map = (slim.conv2d(
                g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry, logits, text_proposals, g[3], rotated_image

コード例 #32

0

ファイルを表示

def main(_):
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001
    ensemble_type = FLAGS.ensemble_type

    tf.logging.set_verbosity(tf.logging.INFO)

    checkpoint_path_list = [
        FLAGS.checkpoint_path_inception_v1, FLAGS.checkpoint_path_inception_v2,
        FLAGS.checkpoint_path_inception_v3, FLAGS.checkpoint_path_inception_v4,
        FLAGS.checkpoint_path_inception_resnet_v2,
        FLAGS.checkpoint_path_resnet_v1_101,
        FLAGS.checkpoint_path_resnet_v1_152,
        FLAGS.checkpoint_path_resnet_v2_101,
        FLAGS.checkpoint_path_resnet_v2_152, FLAGS.checkpoint_path_vgg_16,
        FLAGS.checkpoint_path_vgg_19
    ]
    normalization_method = [
        'default', 'default', 'default', 'default', 'global', 'caffe_rgb',
        'caffe_rgb', 'default', 'default', 'caffe_rgb', 'caffe_rgb'
    ]
    pred_list = []
    for idx, checkpoint_path in enumerate(checkpoint_path_list, 1):
        with tf.Graph().as_default():
            if int(FLAGS.test_idx) == 20 and idx in [3]:
                continue
            if int(FLAGS.test_idx) in [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
                                       ] and int(FLAGS.test_idx) != idx:
                continue
            # Prepare graph
            if idx in [1, 2, 6, 7, 10, 11]:
                _x_input = tf.placeholder(tf.float32, shape=batch_shape)
                x_input = tf.image.resize_images(_x_input, [224, 224])
            else:
                _x_input = tf.placeholder(tf.float32, shape=batch_shape)
                x_input = _x_input

            x_input = image_normalize(x_input, normalization_method[idx - 1])

            if idx == 1:
                with slim.arg_scope(inception.inception_v1_arg_scope()):
                    _, end_points = inception.inception_v1(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 2:
                with slim.arg_scope(inception.inception_v2_arg_scope()):
                    _, end_points = inception.inception_v2(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 3:
                with slim.arg_scope(inception.inception_v3_arg_scope()):
                    _, end_points = inception.inception_v3(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 4:
                with slim.arg_scope(inception.inception_v4_arg_scope()):
                    _, end_points = inception.inception_v4(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 5:
                with slim.arg_scope(inception.inception_resnet_v2_arg_scope()):
                    _, end_points = inception.inception_resnet_v2(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 6:
                with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                    _, end_points = resnet_v1.resnet_v1_101(x_input,
                                                            num_classes=1000,
                                                            is_training=False)
            elif idx == 7:
                with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                    _, end_points = resnet_v1.resnet_v1_152(x_input,
                                                            num_classes=1000,
                                                            is_training=False)
            elif idx == 8:
                with slim.arg_scope(resnet_v2.resnet_arg_scope()):
                    _, end_points = resnet_v2.resnet_v2_101(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 9:
                with slim.arg_scope(resnet_v2.resnet_arg_scope()):
                    _, end_points = resnet_v2.resnet_v2_152(
                        x_input, num_classes=num_classes, is_training=False)
            elif idx == 10:
                with slim.arg_scope(vgg.vgg_arg_scope()):
                    _, end_points = vgg.vgg_16(x_input,
                                               num_classes=1000,
                                               is_training=False)
                    end_points['predictions'] = tf.nn.softmax(
                        end_points['vgg_16/fc8'])
            elif idx == 11:
                with slim.arg_scope(vgg.vgg_arg_scope()):
                    _, end_points = vgg.vgg_19(x_input,
                                               num_classes=1000,
                                               is_training=False)
                    end_points['predictions'] = tf.nn.softmax(
                        end_points['vgg_19/fc8'])

            #end_points = tf.reduce_mean([end_points1['Predictions'], end_points2['Predictions'], end_points3['Predictions'], end_points4['Predictions']], axis=0)

            #predicted_labels = tf.argmax(end_points, 1)

            # Run computation
            saver = tf.train.Saver(slim.get_model_variables())
            session_creator = tf.train.ChiefSessionCreator(
                scaffold=tf.train.Scaffold(saver=saver),
                checkpoint_filename_with_path=checkpoint_path,
                master=FLAGS.master)

            pred_in = []
            filenames_list = []
            with tf.train.MonitoredSession(
                    session_creator=session_creator) as sess:
                for filenames, images in load_images(FLAGS.input_dir,
                                                     batch_shape):
                    #if idx in [1,2,6,7,10,11]:
                    #  # 16x299x299x3
                    #  images = zoom(images, (1, 0.7491638795986622, 0.7491638795986622, 1), order=2)
                    filenames_list.extend(filenames)
                    end_points_dict = sess.run(end_points,
                                               feed_dict={_x_input: images})
                    if idx in [6, 7, 10, 11]:
                        end_points_dict['predictions'] = \
                                      np.concatenate([np.zeros([FLAGS.batch_size, 1]),
                                                      np.array(end_points_dict['predictions'].reshape(-1, 1000))],
                                                      axis=1)
                    try:
                        pred_in.extend(end_points_dict['Predictions'].reshape(
                            -1, num_classes))
                    except KeyError:
                        pred_in.extend(end_points_dict['predictions'].reshape(
                            -1, num_classes))
            pred_list.append(pred_in)

    if ensemble_type == 'mean':
        pred = np.mean(pred_list, axis=0)
        labels = np.argmax(
            pred, axis=1
        )  # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch
    elif ensemble_type == 'vote':
        pred = np.argmax(
            pred_list, axis=2
        )  # model_num X batch X class_num ==(np.mean)==> batch X class_num ==(np.argmax)==> batch
        labels = np.median(pred, axis=0)
    with tf.gfile.Open(FLAGS.output_file, 'w') as out_file:
        for filename, label in zip(filenames_list, labels):
            out_file.write('{0},{1}\n'.format(filename, label))

コード例 #33

0

ファイルを表示

def main(_):
    if not FLAGS.dataset_dir:
        raise ValueError(
            'You must supply the dataset directory with --dataset_dir')

    dropout_val = 0.8

    is_flip = True

    is_smoothing = True

    maintain_aspect_ratio = True

    min_perc = 0.90
    is_random_crops = False

    max_rotation = 0

    num_bins = 500
    no_output_params = 4
    num_classes = no_output_params * num_bins

    eval_num_classes = 7 * num_bins

    num_samples = sum(
        1 for _ in tf.python_io.tf_record_iterator(FLAGS.dataset_dir))
    print("No. of training examples: ", num_samples)

    assert max_rotation >= 0

    print('---------------------------------------------------------')
    print('Make sure that no. of training samples is actually ' +
          str(num_samples))
    print('---------------------------------------------------------')

    if FLAGS.model_name == 'inception-v4':
        net_width = 299
        net_height = 299
    else:
        net_width = 224
        net_height = 224

    tf.logging.set_verbosity(tf.logging.INFO)
    with tf.Graph().as_default():
        global_step = slim.create_global_step()

        data_path = FLAGS.dataset_dir
        filename_queue = tf.train.string_input_producer([data_path])
        image, label, carla_width, carla_height = util_tfio.general_read_and_decode(
            filename_queue, num_classes=8, dtype=tf.float64)
        print(image)
        print(label)

        # --------------------------------------------------------------------------------------------------------------------
        degree_angle = tf.random_uniform([],
                                         minval=-max_rotation,
                                         maxval=max_rotation,
                                         dtype=tf.float32)
        radian_angle = util_tfgeometry.tf_deg2rad(degree_angle)

        label = tf.reshape(label, (4, 2))
        # my_fov = label[3, 0]
        # my_pitch = label[3, 1]

        label = label[:3, :]

        if is_flip:
            image, bool_flip = util_tfimage.random_flip_left_right(image)

            def flip_gt():
                return tf.stack(
                    ([[
                        tf.cast(carla_width, label.dtype) - label[1, 0],
                        label[1, 1]
                    ],
                      [
                          tf.cast(carla_width, label.dtype) - label[0, 0],
                          label[0, 1]
                      ],
                      [
                          tf.cast(carla_width, label.dtype) - label[2, 0],
                          label[2, 1]
                      ]]))

            def gt():
                return label

            label = tf.cond(bool_flip, flip_gt, gt)

        if max_rotation > 0:
            # image rotation is buggy on GPU
            with tf.device('/cpu:0'):
                image = tf.contrib.image.rotate(image,
                                                radian_angle,
                                                interpolation='BILINEAR')
            max_width, max_height = util_tfgeometry.rotatedRectWithMaxArea_tf(
                carla_width, carla_height, radian_angle)
            max_height = tf.cast(tf.floor(max_height), tf.int32)
            max_width = tf.cast(tf.floor(max_width), tf.int32)
            print("max_width, height", max_width, max_height)
            image = tf.image.resize_image_with_crop_or_pad(
                image, target_height=max_height, target_width=max_width)

            rot_vps = util_tfgeometry.rotate_vps(
                (carla_width / 2, carla_height / 2), label,
                tf.cast(radian_angle, dtype=tf.float64))
            crop_rot_vps = util_tfgeometry.center_crop_vps(
                rot_vps,
                orig_dims=(carla_width, carla_height),
                crop_dims=(max_width, max_height))
        else:
            max_width = carla_width
            max_height = carla_height
            crop_rot_vps = label

        if maintain_aspect_ratio:
            image, max_width, max_height = util_tfimage.square_random_crop(
                image, max_width, max_height)

        if not is_random_crops:
            image = tf.image.resize_images(
                image, [net_width, net_height],
                method=tf.image.ResizeMethod.BILINEAR)

            float_max_height = tf.cast(max_height, tf.float64)
            float_max_width = tf.cast(max_width, tf.float64)
            final_vps = util_tfgeometry.resize_vps(
                crop_rot_vps,
                orig_dims=(float_max_width, float_max_height),
                resize_dims=(net_width, net_height))
        else:
            rand_perc = tf.random_uniform([], minval=min_perc, maxval=1.0)
            crop_height = tf.maximum(
                net_height,
                tf.cast(tf.floor(rand_perc * tf.cast(max_height, tf.float32)),
                        dtype=tf.int32))
            crop_width = tf.maximum(
                net_width,
                tf.cast(tf.floor(rand_perc * tf.cast(max_width, tf.float32)),
                        dtype=tf.int32))
            image, off_height, off_width = vgg_preprocessing._custom_random_crop(
                [image], crop_height, crop_width)[0]
            image = tf.image.resize_images(
                image, [net_width, net_height],
                method=tf.image.ResizeMethod.BILINEAR)

            temp_final_vps = util_tfgeometry.offset_vps(
                crop_rot_vps, off_height, off_width)
            float_crop_height = tf.cast(crop_height, tf.float64)
            float_crop_width = tf.cast(crop_width, tf.float64)
            final_vps = util_tfgeometry.resize_vps(
                temp_final_vps,
                orig_dims=(float_crop_width, float_crop_height),
                resize_dims=(net_width, net_height))

        image = util_tfimage.distort_color(image,
                                           color_ordering=tf.random_uniform(
                                               [],
                                               minval=0,
                                               maxval=4,
                                               dtype=tf.int32),
                                           fast_mode=False)

        # Value here, before pre-processing below will be 0-255
        if FLAGS.model_name == 'vgg-m':
            model = pickle.load(open("<vggm-tf.p>", "rb"))
            average_image = np.load('<vgg_average_image.npy>')
            image = image - average_image
        elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16':
            image = vgg_preprocessing.my_preprocess_image(image)
        elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \
                FLAGS.model_name == 'inception-v4':
            image = tf.cast(image, tf.float32) * (1. / 255)
            image = (image - 0.5) * 2
        else:
            sys.exit("Invalid value for model name!")

        label = tf.reshape(final_vps, (3, 2))
        all_label = tf.concat([label, [[0], [0], [0]]], axis=1)

        output_label, output_indices = util_tfprojection.get_all_projected_from_3vps_modified_tf(
            all_label,
            no_bins=num_bins,
            img_dims=(net_width, net_height),
            verbose=False)

        if is_smoothing:
            stddev = 0.5

            max_indices = tf.argmax(output_label, axis=1)

            normalized = tf.distributions.Normal(
                loc=tf.reshape(tf.cast(max_indices, dtype=tf.float64),
                               (no_output_params, 1)),
                scale=tf.constant(stddev, dtype=tf.float64))

            probs = normalized.prob(
                tf.tile(
                    tf.reshape(
                        tf.cast(tf.range(output_label.shape[1]),
                                dtype=tf.float64), (1, -1)),
                    (no_output_params, 1)))

            act_normalized = probs / tf.reduce_sum(
                probs, axis=1, keepdims=True)
            label = tf.reshape(act_normalized, [-1])
        else:
            label = tf.reshape(output_label, [-1])

        print("SHAPE AT END:", image, label)
        # --------------------------------------------------------------------------------------------------------------------

        # shuffle requires 'min_after_dequeue' parameter (min to keep in queue)
        images, labels = tf.train.shuffle_batch(
            [image, label],
            batch_size=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=6 * FLAGS.batch_size,
            min_after_dequeue=4 * FLAGS.batch_size)

        labels = tf.stop_gradient(labels)

        ###########################
        # Reading evaluation data #
        ###########################
        if FLAGS.model_name == 'inception-v4':
            eval_path = ''
        else:
            eval_path = '<eval-CARLA-VP.tfrecords'

        eval_max_batch_size = min(50, FLAGS.batch_size)
        no_eval_examples = sum(
            1 for _ in tf.python_io.tf_record_iterator(eval_path))
        divs = np.array(list(factors(no_eval_examples)))
        sorted_divs = divs[divs.argsort()]
        eval_batch_size = sorted_divs[sorted_divs < eval_max_batch_size][-1]
        print("EVALUATION BATCH SIZE:", eval_batch_size)
        print("Number of examples in evaluation dataset: ", no_eval_examples)
        eval_filename_queue = tf.train.string_input_producer(
            [eval_path])  # , num_epochs=2)

        e_image, e_label = util_tfio.read_and_decode_evaluation(
            eval_filename_queue, eval_num_classes, net_height, net_width)
        print("eval_num_classes:", eval_num_classes)

        # Value here, before pre-processing below will be 0-255
        if FLAGS.model_name == 'vgg-m':
            e_image = e_image - average_image
        elif FLAGS.model_name == 'resnet-50' or FLAGS.model_name == 'resnet-101' or FLAGS.model_name == 'vgg-16':
            e_image = vgg_preprocessing.my_preprocess_image(e_image)
        elif FLAGS.model_name == 'mobilenet-v1' or FLAGS.model_name == 'inception-v1' or \
                FLAGS.model_name == 'inception-v4':
            e_image = tf.cast(e_image, tf.float32) * (1. / 255)
            e_image = (e_image - 0.5) * 2
        else:
            sys.exit("Invalid value for model name!")

        e_images, e_labels = tf.train.batch(
            [e_image, e_label],
            batch_size=eval_batch_size,
            num_threads=FLAGS.num_preprocessing_threads,
            capacity=5 * eval_batch_size)
        # --------------------------

        print("PREFETCH_QUEUE, CAPACITY:", FLAGS.batch_size, ", NUM_THREADS:",
              FLAGS.num_preprocessing_threads)
        batch_queue = slim.prefetch_queue.prefetch_queue(
            [images, labels],
            capacity=FLAGS.batch_size,
            num_threads=FLAGS.num_preprocessing_threads)

        images, labels = batch_queue.dequeue()

        if FLAGS.model_name == 'vgg-m':
            logits = vgg_m.cnn_vggm(images,
                                    num_classes=num_classes,
                                    model=model)

            eval_logits = vgg_m.cnn_vggm(e_images,
                                         num_classes=num_classes,
                                         model=model,
                                         reuse=True)
        elif FLAGS.model_name == 'vgg-16':
            with slim.arg_scope(vgg.vgg_arg_scope()):
                logits, end_points = vgg.vgg_16(images,
                                                num_classes=num_classes,
                                                is_training=True,
                                                dropout_keep_prob=dropout_val)

                eval_logits, _ = vgg.vgg_16(e_images,
                                            num_classes=num_classes,
                                            is_training=False,
                                            reuse=True)
        elif FLAGS.model_name == 'resnet-50':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                logits, end_points = resnet_v1.resnet_v1_50(
                    images, num_classes=num_classes, is_training=True)

                eval_logits, _ = resnet_v1.resnet_v1_50(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'resnet-101':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                logits, end_points = resnet_v1.resnet_v1_101(
                    images, num_classes=num_classes, is_training=True)

                eval_logits, _ = resnet_v1.resnet_v1_101(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'inception-v1':
            with slim.arg_scope(inception_v1.inception_v1_arg_scope()):
                logits, end_points = inception_v1.inception_v1(
                    images,
                    num_classes=num_classes,
                    is_training=True,
                    dropout_keep_prob=dropout_val)

                eval_logits, _ = inception_v1.inception_v1(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'inception-v4':
            with slim.arg_scope(inception_v4.inception_v4_arg_scope()):
                logits, end_points = inception_v4.inception_v4(
                    images,
                    num_classes=num_classes,
                    is_training=True,
                    dropout_keep_prob=dropout_val)

                eval_logits, _ = inception_v4.inception_v4(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        elif FLAGS.model_name == 'mobilenet-v1':
            with slim.arg_scope(mobilenet_v1.mobilenet_v1_arg_scope()):
                logits, end_points = mobilenet_v1.mobilenet_v1(
                    images,
                    num_classes=num_classes,
                    is_training=True,
                    dropout_keep_prob=dropout_val)

                eval_logits, _ = mobilenet_v1.mobilenet_v1(
                    e_images,
                    num_classes=num_classes,
                    is_training=False,
                    reuse=True)
        else:
            sys.exit("Invalid value for model name!")

        jumps = int(num_classes / no_output_params)
        classification_loss_1 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=labels[:, :jumps],
                                                    logits=logits[:, :jumps]))
        classification_loss_2 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=labels[:, jumps:2 * jumps],
                logits=logits[:, jumps:2 * jumps]))
        classification_loss_3 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=labels[:, 2 * jumps:3 * jumps],
                logits=logits[:, 2 * jumps:3 * jumps]))
        classification_loss_4 = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                labels=labels[:, 3 * jumps:4 * jumps],
                logits=logits[:, 3 * jumps:4 * jumps]))

        ##############################################################################################
        # try implementing L1 loss among both here to help visualize comparison with validation loss

        logits_ind = tf.argmax(tf.reshape(logits,
                                          (-1, no_output_params, num_bins)),
                               axis=2)
        labels_ind = tf.argmax(tf.reshape(labels,
                                          (-1, no_output_params, num_bins)),
                               axis=2)
        print("Logits_ind shape:", logits_ind.shape)

        train_l1_loss = tf.reduce_sum(tf.abs(logits_ind - labels_ind))

        regularization_loss = tf.add_n(slim.losses.get_regularization_losses())
        total_loss = (classification_loss_1 + classification_loss_2 +
                      classification_loss_3 + classification_loss_4 +
                      regularization_loss)

        print("After classification loss:")
        print(logits.shape)
        print(labels.shape)
        print("---------------------------------------")

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for losses.
        # for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
        for loss in tf.get_collection(tf.GraphKeys.LOSSES):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Add summaries for variables.
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        #########################################
        # Configure the optimization procedure. #
        #########################################
        learning_rate = tf.placeholder(tf.float32,
                                       shape=[],
                                       name="learning_rate")
        optimizer = util_tftraining.configure_optimizer(learning_rate,
                                                        FLAGS=FLAGS)

        print("learning rate tensor:", learning_rate)

        # Variables to train.
        variables_to_train = util_tftraining.get_variables_to_train(
            FLAGS=FLAGS)

        print("-----------------------------------------")
        print("variables to train: ", variables_to_train)
        print("-----------------------------------------")

        train_op = slim.learning.create_train_op(
            total_loss=total_loss,
            optimizer=optimizer,
            variables_to_train=variables_to_train,
            global_step=global_step)

        if classification_loss_1 is not None:
            tf.summary.scalar('Losses/classification_loss_1',
                              classification_loss_1)
        if classification_loss_2 is not None:
            tf.summary.scalar('Losses/classification_loss_2',
                              classification_loss_2)
        if classification_loss_3 is not None:
            tf.summary.scalar('Losses/classification_loss_3',
                              classification_loss_3)
        if classification_loss_4 is not None:
            tf.summary.scalar('Losses/classification_loss_4',
                              classification_loss_4)

        if regularization_loss is not None:
            tf.summary.scalar('Losses/regularization_loss',
                              regularization_loss)

        # Add total_loss to summary.
        summaries.add(tf.summary.scalar('total_loss', total_loss))

        summaries |= set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Merge all summaries together.
        tf.summary.merge(list(summaries), name='summary_op')

        session_config = tf.ConfigProto()
        session_config.allow_soft_placement = True
        session_config.gpu_options.allow_growth = True

        init_fn = util_tftraining.get_init_fn(FLAGS=FLAGS)

        print("Before learning.train", flush=True)
        print("---------------------------------------------------")
        print("---------------------------------------------------")

        early_stop_epochs = 10
        no_steps_in_epoch = int(np.ceil(num_samples / FLAGS.batch_size))
        scaffold = tf.train.Scaffold(saver=tf.train.Saver(
            max_to_keep=early_stop_epochs + 3))

        show_eval_loss_every_steps = no_steps_in_epoch / 5
        save_checkpoint_every_steps = no_steps_in_epoch / 5

        with tf.train.MonitoredTrainingSession(
                master='',
                is_chief=True,
                checkpoint_dir=FLAGS.train_dir,
                scaffold=scaffold,
                hooks=None,
                chief_only_hooks=None,
                save_checkpoint_steps=save_checkpoint_every_steps,
                save_summaries_secs=FLAGS.save_summaries_secs,
                config=session_config,
                stop_grace_period_secs=120,
                log_step_count_steps=0,
                max_wait_secs=10) as mon_sess:

            print("-----------------------------------------")
            if init_fn is not None:
                init_fn(mon_sess)
                print("Succesfully loaded model")
            else:
                print("A model already exists in the 'train_dir' path")
            print("-----------------------------------------")

            last_sum_train_loss = 0
            last_sum_tl1_loss = 0
            best_sum_train_loss = np.inf
            step_no = 0
            current_lr = FLAGS.learning_rate

            no_params = 7
            consider_params = 4

            consider_top = 11

            best_eval_wa = np.inf
            best_eval_epoch = 0

            while True:
                _, train_loss, tl1_loss = mon_sess.run(
                    [train_op, total_loss, train_l1_loss],
                    feed_dict={learning_rate: current_lr})
                last_sum_train_loss += train_loss
                last_sum_tl1_loss += tl1_loss

                epoch_no = int(
                    np.floor((step_no * FLAGS.batch_size) / num_samples))

                if np.mod(step_no, FLAGS.log_every_n_steps) == 0:
                    print("Epoch {}, Step {}, lr={:0.5f}, Loss: {}".format(
                        epoch_no, step_no, current_lr, train_loss),
                          flush=True)

                # calculating evaluation loss alongside as well
                if np.mod(step_no, show_eval_loss_every_steps) == 0:
                    print("--In eval block--")

                    total_l1_loss = 0
                    total_wa_loss = 0

                    for loop_no in range(
                            int(np.floor(no_eval_examples / eval_batch_size))):
                        np_rawpreds, np_labels = mon_sess.run(
                            [eval_logits, e_labels])

                        for i in range(eval_batch_size):

                            predicted_label = np.argmax(
                                np_rawpreds[i, :].reshape(consider_params, -1),
                                axis=1)
                            gt_label = np.argmax(np_labels[i, :].reshape(
                                no_params, -1)[:consider_params, :],
                                                 axis=1)

                            l1_loss = np.sum(np.abs(predicted_label -
                                                    gt_label))

                            wa = 0
                            for ln in range(consider_params):
                                predsoft = my_softmax(
                                    np_rawpreds[i, :].reshape(
                                        consider_params,
                                        -1)[ln, :][np.newaxis])
                                predsoft = predsoft.squeeze()
                                labsoft = np_labels[i, :].reshape(
                                    no_params, -1)[ln, :]
                                topindices = predsoft.argsort(
                                )[::-1][:consider_top]
                                probsindices = predsoft[topindices] / np.sum(
                                    predsoft[topindices])
                                wa += np.abs(
                                    int(
                                        np.round(
                                            np.sum(probsindices *
                                                   topindices))) -
                                    labsoft.argmax())

                            total_l1_loss += l1_loss
                            total_wa_loss += wa

                    avg_manhattan_loss = total_l1_loss / no_eval_examples
                    avg_wa_loss = total_wa_loss / no_eval_examples

                    print(
                        "-------------------------------------------------------------------"
                    )
                    print("Average manhattan loss per scalar:",
                          avg_manhattan_loss / consider_params)
                    print(
                        "Average manhattan loss(Weighted avg. top 10 bins)per scalar:",
                        avg_wa_loss / consider_params)
                    print(
                        "-------------------------------------------------------------------",
                        flush=True)

                    if avg_wa_loss < best_eval_wa:
                        best_eval_wa = avg_wa_loss
                        best_eval_epoch = epoch_no

                    if avg_wa_loss > best_eval_wa and (
                            epoch_no - best_eval_epoch
                    ) > early_stop_epochs and current_lr < 1e-3 and epoch_no > 10:
                        print("STOPPING TRAINING at epoch: ", epoch_no,
                              ", best epoch was:", best_eval_epoch, "(step: ",
                              best_eval_epoch * num_samples / FLAGS.batch_size,
                              ")")
                        print("Current eval_wa:", avg_wa_loss,
                              ", best eval_wa:", best_eval_wa)
                        break

                    if step_no > 0:
                        last_sum_train_loss /= show_eval_loss_every_steps
                        last_sum_tl1_loss /= (no_steps_in_epoch *
                                              FLAGS.batch_size *
                                              no_output_params)
                        if last_sum_train_loss > best_sum_train_loss:
                            if current_lr > FLAGS.end_learning_rate:
                                print("Dividing learning rate by 10.0")
                                current_lr /= 10.0
                                best_sum_train_loss = last_sum_train_loss
                            else:
                                print(
                                    "Already reached lowest possible lr i.e. ",
                                    current_lr)
                        else:
                            best_sum_train_loss = last_sum_train_loss

                        print("last_sum_train_loss:", last_sum_train_loss)
                        print("L1_train_loss:", last_sum_tl1_loss)
                        last_sum_train_loss = 0
                        last_sum_tl1_loss = 0
                #########################################################################################

                step_no += 1

                if FLAGS.max_number_of_steps is not None:
                    if step_no >= FLAGS.max_number_of_steps:
                        break

            print("Final Step {}, Loss: {}".format(step_no, train_loss))

        print("---------------------The End-----------------------")
        print("---------------------------------------------------")
        print("---------------------------------------------------")