예제 #1
0
def res_fcn_32s(inputs, num_classes, is_training):
    with tf.variable_scope('res_fcn_32s'):
        # Use the structure of res_v1_50 classification network
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, end_points = resnet_v1.resnet_v1_50(inputs, num_classes, is_training=is_training,
                                                 global_pool=False, output_stride=32)
        # Deconvolutional layers to recover the size of input image
        # Padding is 'SAME' for conv layers thus conv layers do not change the size
        # There are 5 max-pool layers with size reduced by half
        # Totally size reduced by scale of 2^5 = 32 times
        # That's also the reason why this model is called fcn_32s
        # Use bilinear interpolation for upsampling
        upsample_filter = upsampling.bilinear_upsample_weights(32, num_classes)
        upsample_filter_tensor = tf.constant(upsample_filter)
        shape = tf.shape(net)
        output = tf.nn.conv2d_transpose(net, upsample_filter_tensor,
                                        output_shape = tf.stack([shape[0], shape[1] * 32,
                                                        shape[2] * 32, shape[3]]),
                                        strides=[1, 32, 32, 1])
        variables = slim.get_variables('res_fcn_32s')
        # Extract variables that are the same as original vgg-16, they could be intialized
        # with pre-trained vgg-16 network
        res_variables = {}
        for variable in variables:
            res_variables[variable.name[12:-2]] = variable
        return output, res_variables
예제 #2
0
 def predict(self, preprocessed_inputs):
     """Predict prediction tensors from inputs tensor.
     
     Outputs of this function can be passed to loss or postprocess functions.
     
     Args:
         preprocessed_inputs: A float32 tensor with shape [batch_size,
             height, width, num_channels] representing a batch of images.
         
     Returns:
         prediction_dict: A dictionary holding prediction tensors to be
             passed to the Loss or Postprocess functions.
     """
     with slim.arg_scope(resnet_v1.resnet_arg_scope()):
         net, endpoints = resnet_v1.resnet_v1_50(preprocessed_inputs,
                                                 num_classes=None,
                                                 is_training=True)
     print(resnet_v1.resnet_v1_50)
     net = tf.squeeze(net, axis=[1, 2])
     print(net)
     logits = slim.fully_connected(net,
                                   num_outputs=self.num_classes,
                                   activation_fn=None,
                                   scope='Predict')
     prediction_dict = {'logits': logits}
     return prediction_dict
def main():
    ckpt_path = './resnet_v1_50.ckpt'
    X = tf.placeholder(tf.float32, shape=[None, 96, 96, 3], name='input')

    with slim.arg_scope(resnet_arg_scope()):
        logits, end_points = resnet_v1_50(X,
                                          num_classes=1000,
                                          is_training=False)

    final_layer_to_load = end_points['resnet_v1_50/block4']

    saver = tf.train.Saver()

    with tf.Session() as sess:
        saver.restore(sess, ckpt_path)
        frozen_graph_def = convert_variables_to_constants(
            sess,
            sess.graph_def,
            output_node_names=[final_layer_to_load.name.split(':')[0]])

    frozen_graph = tf.Graph()
    with frozen_graph.as_default():
        tf.import_graph_def(frozen_graph_def, name='')

    sess = tf.Session(graph=frozen_graph)

    res = sess.run(final_layer_to_load.name,
                   {'input:0': np.ones(shape=[12, 96, 96, 3])})
    print("out shape: {}".format(res.shape))
예제 #4
0
파일: resnet.py 프로젝트: exmee/HSSD
    def create_trunk(self, images):
        red, green, blue = tf.split(images * 255, 3, axis=3)
        images = tf.concat([blue, green, red], 3) - MEAN_COLOR

        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(is_training=self.training,
                                           weight_decay=self.weight_decay,
                                           batch_norm_decay=args.bn_decay)):
            blocks = [
                resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 3),
                resnet_utils.Block('block2', bottleneck,
                                   [(512, 128, 2)] + [(512, 128, 1)] * 3),
                resnet_utils.Block('block3', bottleneck, [(1024, 256, 2)] +
                                   [(1024, 256, 1)] * self.num_block3),
                resnet_utils.Block('block4', bottleneck,
                                   [(2048, 512, 2)] + [(2048, 512, 1)] * 2)
            ]

            net, endpoints = resnet_v1.resnet_v1(images,
                                                 blocks,
                                                 global_pool=False,
                                                 reuse=self.reuse,
                                                 scope=self.scope)
            self.outputs = endpoints
        self.add_extra_layers(net)
예제 #5
0
    def predict(self, inputs_dict):
        """Predict prediction tensors from inputs tensor.
        
        Outputs of this function can be passed to loss or postprocess functions.
        
        Args:
            inputs_dict: a dictionary of inputs, include: inputs, is_training
            inputs: A float32 placeholder or tensor with shape [batch_size, height, width, num_channels] 
                    representing a batch of images.
                    tf.placeholder(tf.float32, shape=[None, 224, 224, 3], name='inputs')
            is_training: tf.placeholder(tf.bool, name='is_training')
            
        Returns:
            prediction_dict: A dictionary holding prediction tensors to be
                    passed to the Loss or Postprocess functions.
        """
        input_images = inputs_dict['inputs']
        is_training = inputs_dict['is_training'] 
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, endpoints = resnet_v1.resnet_v1_35(
                input_images,
                num_classes=None,
                is_training=is_training)

        with tf.variable_scope('Logits'):
            # the last average pooling layer makes the resnet50 ouput tensor with shape [None, 1, 1, 2048]
            # use tf.squeeze to flatten it into [None, 2048]
            net = tf.squeeze(net, axis=[1, 2])
            net = slim.dropout(net, keep_prob=0.5, is_training=is_training, scope='scope')
            logits = slim.fully_connected(net, num_outputs=self.num_classes,
                                      activation_fn=None, scope='fc')

        prediction_dict = {'logits': logits}
        return prediction_dict
예제 #6
0
 def predict(self, preprocessed_inputs):
     with slim.arg_scope(resnet_v1.resnet_arg_scope()):
         net, endpoints = resnet_v1.resnet_v1_50(
             preprocessed_inputs,
             num_classes=None,
             is_training=self._is_training)
     net = tf.squeeze(net, axis=[1, 2])
     logits = slim.fully_connected(net,
                                   num_outputs=self.num_classes,
                                   activation_fn=None,
                                   scope='Predict')
     prediction_dict = {'logits': logits}
     return prediction_dict
예제 #7
0
def endpoints(image, is_training):
    if image.get_shape().ndims != 4:
        raise ValueError('Input must be of size [batch, height, width, 3]')

    image = image - tf.constant(_RGB_MEAN, dtype=tf.float32, shape=(1,1,1,3))

    with tf.contrib.slim.arg_scope(resnet_arg_scope(batch_norm_decay=0.9, weight_decay=0.0)):
        _, endpoints = resnet_v1_50(image, num_classes=None, is_training=is_training, global_pool=True)

    endpoints['model_output'] = endpoints['global_pool'] = tf.reduce_mean(
        endpoints['resnet_v1_50/block4'], [1, 2], name='pool5')

    return endpoints, 'resnet_v1_50'
예제 #8
0
파일: model.py 프로젝트: edwardliqi/SPCNet
def build_FPN(images, config, is_training, backbone='resnet50'):
    # images: [batch, h, w, channels]
    # Return: pyramid_feature Dict{P2, P3, P4, P5} of feature maps from different level of the
    # 		  feature pyramid. Each is [batch, height, width, channels]
    pyramid = {}
    # build backbone network
    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=1e-5)):
        if backbone == "resnet50":
            logits, end_points = resnet_v1.resnet_v1_50(
                images, is_training=is_training, scope='resnet_v1_50')
            pyramid['C2'] = end_points[
                'resnet_v1_50/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_50/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_50/block3/unit_5/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_50/block4/unit_3/bottleneck_v1']
        elif backbone == "resnet101":
            logits, end_points = resnet_v1.resnet_v1_101(
                images, is_training=is_training, scope='resnet_v1_101')
            pyramid['C2'] = end_points[
                'resnet_v1_101/block1/unit_2/bottleneck_v1']
            pyramid['C3'] = end_points[
                'resnet_v1_101/block2/unit_3/bottleneck_v1']
            pyramid['C4'] = end_points[
                'resnet_v1_101/block3/unit_22/bottleneck_v1']
            pyramid['C5'] = end_points[
                'resnet_v1_101/block4/unit_3/bottleneck_v1']
        else:
            print("Unkown backbone : ", backbone)
    # build FPN
    pyramid_feature = {}
    arg_scope = _extra_conv_arg_scope_with_bn()
    with tf.variable_scope('FPN'):
        with slim.arg_scope(arg_scope):
            pyramid_feature['P5'] = slim.conv2d(pyramid['C5'],
                                                config.TOP_DOWN_PYRAMID_SIZE,
                                                1)
            for i in range(4, 1, -1):
                upshape = tf.shape(pyramid['C%d' % i])
                u = tf.image.resize_bilinear(pyramid_feature['P%d' % (i+1)], \
                 size = (upshape[1], upshape[2]))
                c = slim.conv2d(pyramid['C%d' % i],
                                config.TOP_DOWN_PYRAMID_SIZE, 1)
                s = tf.add(c, u)
                pyramid_feature['P%d' % i] = slim.conv2d(
                    s, config.TOP_DOWN_PYRAMID_SIZE, 3)
    return pyramid_feature
def main(args):

    dataset = CifarDataSet(args.batch_size, args.data_dir)
    dataset.make_batch_valid_or_test()
    if 'cifar-100' in args.data_dir:
        num_classes = 100
    else:
        num_classes = 10

    model = resnet_v1.resnet_v1_110
    # it's actually a 112 since there are 2 additional 1x1 conv for shortcuts
    print("Data loaded! Building model...")

    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, _ = model(dataset.images_vt, num_classes, is_training=False)
        logits = tf.squeeze(net, [1, 2])

    # tf saver, session
    restorer = tf.train.Saver()

    config = tf.ConfigProto(allow_soft_placement=True,
                            gpu_options=tf.GPUOptions(
                                force_gpu_compatible=True, allow_growth=True))

    sess = tf.Session(config=config)
    sess.run(dataset.iterator_vt.initializer,
             feed_dict={dataset.validation: True})

    restorer.restore(sess, tf.train.latest_checkpoint(args.save_dir))

    print("Model built! Getting logits...")

    logits_nps = []
    num_eval_batches = dataset.images_np['valid'].shape[
        0] // dataset.eval_batch_size
    for step in range(num_eval_batches):
        logits_np = sess.run(logits)
        logits_nps.append(logits_np)

    logits_nps = np.concatenate(logits_nps)

    print("Logits get! Do temperature scaling...")
    print("=" * 80)

    temp_var = temp_scaling(logits_nps, dataset.labels_np['valid'], sess)
    # use temp_var with your logits to get calibrated output

    print("=" * 80)
    print("Done!")
예제 #10
0
def get_network(name,
                image,
                weight_decay=0.000005,
                is_training=False,
                reuse=False):
    if name == 'resnet50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet50(image,
                                          num_classes=None,
                                          is_training=is_training,
                                          reuse=reuse)

    end_points['input'] = image
    return logits, end_points
예제 #11
0
 def resnet_forward(self, x, layer, scope):
     x = 255.0 * (0.5 * (x + 1.0))
     # subtract means
     mean = tf.constant([123.68, 116.779, 103.939],
                        dtype=tf.float32,
                        shape=[1, 1, 1, 3],
                        name='img_mean')  # RGB means from VGG paper
     x = x - mean
     # send through resnet
     with slim.arg_scope(resnet_arg_scope()):
         _, layers = resnet_v1_50(x,
                                  num_classes=1000,
                                  is_training=False,
                                  reuse=self.reuse_resnet)
     self.reuse_resnet = True
     return layers['resnet_v1_50/' + layer]
예제 #12
0
def create_trunk(images,
                 rois=None,
                 reuse=False,
                 fc_layers=True,
                 weight_decay=0.0005):
    red, green, blue = tf.split(images * 255, 3, axis=3)
    images = tf.concat([blue, green, red], 3) - RESNET_MEAN

    with slim.arg_scope(
            resnet_v1.resnet_arg_scope(is_training=False,
                                       weight_decay=weight_decay)):
        net, endpoints = resnet_frcnn(images,
                                      rois=rois,
                                      global_pool=True,
                                      fc_layers=fc_layers,
                                      reuse=reuse)
    return net, endpoints
예제 #13
0
    def __init__(self, images):
        self.layer = {}
        self.images = images

        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            self.nets, _ = resnet_v1.resnet_v1_101(self.images,
                                                   1000,
                                                   is_training=False,
                                                   spatial_squeeze=False,
                                                   global_pool=False,
                                                   output_stride=16)
            print(len(self.nets))
            for index in range(len(self.nets)):
                print("resnet_bolck_%d" % (index + 1))
                print(self.nets[index].get_shape())
        self.layer['block1'] = self.nets[0]
        self.layer['block2'] = self.nets[1]
        self.layer['block3'] = self.nets[2]
        self.layer['block4'] = self.nets[3]
예제 #14
0
    def inference(self, x, is_training=True, reuse=False):
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, endpoints = self.resnet_build_func(inputs=x,
                                                    num_classes=None,
                                                    is_training=is_training,
                                                    reuse=reuse,
                                                    scope=self.network_name)

        with tf.variable_scope('Logits'):
            # the last average pooling layer makes the resnet50 ouput tensor with shape [None, 1, 1, 2048]
            # use tf.squeeze to flatten it into [None, 2048]
            net = tf.squeeze(net, axis=[1, 2])
            #net = slim.fully_connected(net, 512, scope='fc_inter')
            #net = slim.dropout(net, keep_prob=0.5, is_training=is_training, scope='fc_dropout')
            logits = slim.fully_connected(net,
                                          num_outputs=self.num_classes,
                                          activation_fn=None,
                                          scope='fc')
        return logits
예제 #15
0
def model(x, H, reuse, is_training=True):
    if H['slim_basename'] == 'resnet_v1_101':
        with slim.arg_scope(resnet.resnet_arg_scope()):
            _, T = resnet.resnet_v1_101(x,
                                        is_training=is_training,
                                        num_classes=1000,
                                        reuse=reuse)
    elif H['slim_basename'] == 'InceptionV1':
        with slim.arg_scope(inception.inception_v1_arg_scope()):
            _, T = inception.inception_v1(x,
                                          is_training=is_training,
                                          num_classes=1001,
                                          spatial_squeeze=False,
                                          reuse=reuse)
    #print '\n'.join(map(str, [(k, v.op.outputs[0].get_shape()) for k, v in T.iteritems()]))

    coarse_feat = T[H['slim_top_lname']][:, :, :, :H['later_feat_channels']]
    assert coarse_feat.op.outputs[0].get_shape()[3] == H['later_feat_channels']

    # fine feat can be used to reinspect input
    attention_lname = H.get('slim_attention_lname', 'Mixed_3b')
    early_feat = T[attention_lname]

    return coarse_feat, early_feat
예제 #16
0
    def create_trunk(self, images):
        red, green, blue = tf.split(images*255, 3, axis=3)
        images = tf.concat([blue, green, red], 3) - MEAN_COLOR

        with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=self.training,
                                                       weight_decay=self.weight_decay,
                                                       batch_norm_decay=args.bn_decay)):
            blocks = [
                resnet_utils.Block(
                    'block1', bottleneck, [(256, 64, 1)] * 3),
                resnet_utils.Block(
                    'block2', bottleneck, [(512, 128, 2)] + [(512, 128, 1)] * 3),
                resnet_utils.Block(
                    'block3', bottleneck, [(1024, 256, 2)] + [(1024, 256, 1)] * self.num_block3),
                resnet_utils.Block(
                    'block4', bottleneck, [(2048, 512, 2)] + [(2048, 512, 1)] * 2)
            ]

            net, endpoints = resnet_v1.resnet_v1(images, blocks,
                                                 global_pool=False,
                                                 reuse=self.reuse,
                                                 scope=self.scope)
            self.outputs = endpoints
        self.add_extra_layers(net)
예제 #17
0
def model(images, text_scale=512, weight_decay=1e-5, is_training=True):
    """
    define the model, we use slim's implemention of resnet
    """
    images = mean_image_subtraction(images)

    with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
        logits, end_points = resnet_v1.resnet_v1_50(images,
                                                    is_training=is_training,
                                                    scope='resnet_v1_50')

    with tf.variable_scope('feature_fusion', values=[end_points.values]):
        batch_norm_params = {
            'decay': 0.997,
            'epsilon': 1e-5,
            'scale': True,
            'is_training': is_training
        }
        with slim.arg_scope(
            [slim.conv2d],
                activation_fn=tf.nn.relu,
                normalizer_fn=slim.batch_norm,
                normalizer_params=batch_norm_params,
                weights_regularizer=slim.l2_regularizer(weight_decay)):
            f = [
                end_points['pool5'], end_points['pool4'], end_points['pool3'],
                end_points['pool2']
            ]
            for i in range(4):
                print('Shape of f_{} {}'.format(i, f[i].shape))
            g = [None, None, None, None]
            h = [None, None, None, None]
            num_outputs = [None, 128, 64, 32]
            for i in range(4):
                if i == 0:
                    h[i] = f[i]
                else:
                    c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1),
                                       num_outputs[i], 1)
                    h[i] = slim.conv2d(c1_1, num_outputs[i], 3)
                if i <= 2:
                    g[i] = unpool(h[i])
                else:
                    g[i] = slim.conv2d(h[i], num_outputs[i], 3)
                print('Shape of h_{} {}, g_{} {}'.format(
                    i, h[i].shape, i, g[i].shape))

            # here we use a slightly different way for regression part,
            # we first use a sigmoid to limit the regression range, and also
            # this is do with the angle map
            F_score = slim.conv2d(g[3],
                                  1,
                                  1,
                                  activation_fn=tf.nn.sigmoid,
                                  normalizer_fn=None)
            # 4 channel of axis aligned bbox and 1 channel rotation angle
            geo_map = slim.conv2d(
                g[3], 4, 1, activation_fn=tf.nn.sigmoid,
                normalizer_fn=None) * text_scale
            angle_map = (slim.conv2d(
                g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) -
                         0.5) * np.pi / 2  # angle is between [-45, 45]
            F_geometry = tf.concat([geo_map, angle_map], axis=-1)

    return F_score, F_geometry
예제 #18
0
파일: resnet.py 프로젝트: exmee/HSSD
    def add_extra_layers(self, net):
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(is_training=self.training,
                                           weight_decay=self.weight_decay,
                                           batch_norm_decay=args.bn_decay)):
            block_depth = 2
            num_fm = 2048
            '''''
            blocks = [
                resnet_utils.Block(
                    'block5', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                resnet_utils.Block(
                    'block6', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                resnet_utils.Block(
                    'block7', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
            ]
            '''
            blocks = [
                resnet_utils.Block(
                    'block5', bottleneck, [(num_fm // 2, num_fm // 2, 2)] +
                    [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)),
                resnet_utils.Block(
                    'block6', bottleneck, [(num_fm // 2, num_fm // 2, 2)] +
                    [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)),
                resnet_utils.Block(
                    'block7', bottleneck, [(num_fm // 2, num_fm // 2, 1)] +
                    [(num_fm // 2, num_fm // 2, 1)] * (block_depth - 1)),
            ]
            if args.image_size == 512:
                blocks += [
                    resnet_utils.Block(
                        'block8', bottleneck, [(num_fm, num_fm // 4, 2)] +
                        [(num_fm, num_fm // 4, 1)] * (block_depth - 1)),
                ]

            net, endpoints = resnet_v1.resnet_v1(net,
                                                 blocks,
                                                 global_pool=False,
                                                 include_root_block=False,
                                                 reuse=self.reuse,
                                                 scope=DEFAULT_SSD_SCOPE)
            self.outputs.update(endpoints)
            with tf.variable_scope(DEFAULT_SSD_SCOPE + "_back",
                                   reuse=self.reuse):
                end_points_collection = "reverse_ssd_end_points"
                #with slim.arg_scope([slim.conv2d, attention],
                #with slim.arg_scope([slim.conv2d, sub_pixel_skip],
                #with slim.arg_scope([slim.conv2d, noconcat],
                #with slim.arg_scope([slim.conv2d, bottleneck_skip],
                with slim.arg_scope([slim.conv2d, tail_att],
                                    outputs_collections=end_points_collection):
                    top_fm = args.top_fm
                    int_fm = top_fm // 4
                    if args.image_size == 512:
                        # as long as the number of pooling layers is bigger due to
                        # the higher resolution, an extra layer is appended
                        #net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'],
                        #                     top_fm, int_fm, scope='block_rev7')
                        #net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block7'],
                        #                      top_fm, int_fm, scope='block_rev7')
                        #net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'],
                        #                      top_fm, int_fm, scope='block_rev7')
                        #net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'],
                        #                      top_fm, int_fm, scope='block_rev7')
                        net = tail_att(net,
                                       self.outputs[DEFAULT_SSD_SCOPE +
                                                    '/block7'],
                                       top_fm,
                                       int_fm,
                                       scope='block_rev7')
                    '''''
                    net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                         top_fm, int_fm, scope='block_rev6')
                    net = attention(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                         top_fm, int_fm, scope='block_rev5')
                    net = attention(net, self.outputs[self.scope + '/block4'],
                                         top_fm, int_fm, scope='block_rev4')
                    net = attention(net, self.outputs[self.scope + '/block3'],
                                         top_fm, int_fm, scope='block_rev3')
                    net = attention(net, self.outputs[self.scope + '/block2'],
                                         top_fm, int_fm, scope='block_rev2')
                    '''
                    '''''
                    net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = sub_pixel_skip(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = sub_pixel_skip(net, self.outputs[self.scope + '/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = sub_pixel_skip(net, self.outputs[self.scope + '/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = sub_pixel_skip(net, self.outputs[self.scope + '/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    '''
                    '''''
                    net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = noconcat(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = noconcat(net, self.outputs[self.scope + '/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = noconcat(net, self.outputs[self.scope + '/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = noconcat(net, self.outputs[self.scope + '/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    ''' ''
                    '''''
                    net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    '''
                    '''''
                    net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = tail_att(net, self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = tail_att(net, self.outputs[self.scope + '/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = tail_att(net, self.outputs[self.scope + '/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = tail_att(net, self.outputs[self.scope + '/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    '''
                    net = tail_att(net,
                                   self.outputs[DEFAULT_SSD_SCOPE + '/block6'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev6')
                    net = tail_att(net,
                                   self.outputs[DEFAULT_SSD_SCOPE + '/block5'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev5')
                    net = tail_att(net,
                                   self.outputs[self.scope + '/block4'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev4')
                    net = tail_att(net,
                                   self.outputs[self.scope + '/block3'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev3')
                    net = tail_att(net,
                                   self.outputs[self.scope + '/block2'],
                                   top_fm,
                                   top_fm,
                                   scope='block_rev2')
                    if args.x4:
                        # To provide stride 4 we add one more layer with upsampling
                        #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'],
                        #                     top_fm, int_fm, scope='block_rev1')
                        #net = sub_pixel_skip(net, self.outputs[self.scope + '/block1'],
                        #                      top_fm, int_fm, scope='block_rev1')
                        #net = noconcat(net, self.outputs[self.scope+'/block1'],
                        #                      top_fm, int_fm, scope='block_rev1')
                        #net = bottleneck_skip(net, self.outputs[self.scope+'/block1'],
                        #                      top_fm, int_fm, scope='block_rev1')
                        net = tail_att(net,
                                       self.outputs[self.scope + '/block1'],
                                       top_fm,
                                       int_fm,
                                       scope='block_rev1')
                endpoints = slim.utils.convert_collection_to_dict(
                    end_points_collection)
            self.outputs.update(endpoints)

            # Creating an output of spatial resolution 1x1 with conventional name 'pool6'
            if args.image_size == 512:
                self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\
                        tf.reduce_mean(self.outputs['ssd_back/block_rev7/shortcut'],
                                       [1, 2], name='pool6', keep_dims=True)
            else:
                self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\
                        tf.reduce_mean(self.outputs['ssd_back/block_rev6/shortcut'],
                                       [1, 2], name='pool6', keep_dims=True)
			logits, end_points = inception_resnet_v2.inception_resnet_v2(scaledInputBatchImages, is_training=False)

	elif options.model == "ResNet":
		if options.useImageMean:
			imageMean = tf.reduce_mean(inputBatchImagesPlaceholder, axis=[1, 2], keep_dims=True)
			print ("Image mean shape: %s" % str(imageMean.shape))
			processedInputBatchImages = inputBatchImagesPlaceholder - imageMean
		else:
			channels = tf.split(axis=3, num_or_size_splits=options.imageChannels, value=inputBatchImagesPlaceholder)
			for i in range(options.imageChannels):
				channels[i] -= IMAGENET_MEAN[i]
			processedInputBatchImages = tf.concat(axis=3, values=channels)
			print (processedInputBatchImages.shape)

		# Create model
		arg_scope = resnet_v1.resnet_arg_scope()
		with slim.arg_scope(arg_scope):
			# logits, end_points = resnet_v1.resnet_v1_152(processedInputBatchImages, is_training=options.trainModel, num_classes=numClasses)
			logits, end_points = resnet_v1.resnet_v1_152(processedInputBatchImages, is_training=False)

	elif options.model == "NAS":
		scaledInputBatchImages = tf.scalar_mul((1.0 / 255.0), inputBatchImagesPlaceholder)
		scaledInputBatchImages = tf.subtract(scaledInputBatchImages, 0.5)
		scaledInputBatchImages = tf.multiply(scaledInputBatchImages, 2.0)

		# Create model
		arg_scope = nasnet.nasnet_large_arg_scope()
		with slim.arg_scope(arg_scope):
			# logits, end_points = nasnet.build_nasnet_large(scaledInputBatchImages, is_training=options.trainModel, num_classes=numClasses)
			logits, end_points = nasnet.build_nasnet_large(scaledInputBatchImages, is_training=False, num_classes=options.numClasses)
예제 #20
0
def main(args):
    subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
    log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir)
    if not os.path.isdir(
            log_dir):  # Create the log directory if it doesn't exist
        os.makedirs(log_dir)
    model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir)
    if not os.path.isdir(
            model_dir):  # Create the model directory if it doesn't exist
        os.makedirs(model_dir)

    # Write arguments to a text file
    facenet.write_arguments_to_file(args, os.path.join(log_dir,
                                                       'arguments.txt'))

    # Store some git revision info in a text file in the log directory
    src_path, _ = os.path.split(os.path.realpath(__file__))
    facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv))

    seed = random.SystemRandom().randint(0, 10240)
    np.random.seed(seed=seed)
    train_set = facenet.get_dataset(args.data_dir)

    print('Model directory: %s' % model_dir)
    print('Log directory: %s' % log_dir)
    if args.pretrained_model:
        print('Pre-trained model: %s' %
              os.path.expanduser(args.pretrained_model))

    if args.validation_dir:
        print('Validation directory: %s' % args.validation_dir)
        # Read the file containing the pairs used for testing
        pairs = read_pairs(os.path.expanduser(args.validation_pairs))
        # Get the paths for the corresponding images
        validation_paths, actual_issame = get_paths(
            os.path.expanduser(args.validation_dir), pairs)

    with tf.Graph().as_default():
        tf.set_random_seed(seed)

        # Placeholder for the learning rate
        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')

        batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')

        phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')

        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 3),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int64,
                                            shape=(None, 3),
                                            name='labels')

        input_queue = data_flow_ops.FIFOQueue(capacity=100000,
                                              dtypes=[tf.string, tf.int64],
                                              shapes=[(3, ), (3, )],
                                              shared_name=None,
                                              name=None)
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder])

        nrof_preprocess_threads = 4
        image_size = resnet_v1.resnet_v1_101.default_image_size
        images_and_labels = []
        for _ in range(nrof_preprocess_threads):
            filenames, label = input_queue.dequeue()
            images = []
            for filename in tf.unstack(filenames):
                file_contents = tf.read_file(filename)
                image = tf.image.decode_image(file_contents, channels=3)
                processed_image = vgg_preprocessing.preprocess_image(
                    image, image_size, image_size, is_training=False, bgr=True)
                # if args.random_crop:
                #     image = tf.random_crop(image, [args.image_size, args.image_size, 3])
                # else:
                #     image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size)
                if args.random_flip:
                    processed_image = tf.image.random_flip_left_right(
                        processed_image)

                images.append(processed_image)
            images_and_labels.append([images, label])

        image_batch, labels_batch = tf.train.batch_join(
            images_and_labels,
            batch_size=batch_size_placeholder,
            shapes=[(image_size, image_size, 3), ()],
            enqueue_many=True,
            capacity=4 * nrof_preprocess_threads * args.batch_size,
            allow_smaller_final_batch=True)
        image_batch = tf.identity(image_batch, 'image_batch')
        image_batch = tf.identity(image_batch, 'input')
        labels_batch = tf.identity(labels_batch, 'label_batch')

        # Build the inference graph
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=args.weight_decay)):
            val_logits, _ = resnet_v1.resnet_v1_101_triplet(
                image_batch,
                embedding_size=DIM_HASHCODE,
                is_training=phase_train_placeholder)

        loader = tf.train.Saver()

        embeddings = tf.squeeze(val_logits['triplet_pre_embeddings'], [1, 2],
                                name='feat_embeddings/squeezed')
        global_step = tf.Variable(0, trainable=False)

        # embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings')
        # Split embeddings into anchor, positive and negative and calculate triplet loss
        anchor, positive, negative = tf.unstack(
            tf.reshape(embeddings, [-1, 3, DIM_HASHCODE]), 3, 1)
        triplet_loss = facenet.triplet_loss(anchor, positive, negative,
                                            args.alpha)

        learning_rate = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            args.learning_rate_decay_epochs * args.epoch_size,
            args.learning_rate_decay_factor,
            staircase=True)
        tf.summary.scalar('learning_rate', learning_rate)

        # Calculate the total losses
        regularization_losses = tf.get_collection(
            tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([triplet_loss] + regularization_losses,
                              name='total_loss')

        # Build a Graph that trains the model with one batch of examples and updates the model parameters
        train_op, _ = facenet.train(total_loss, global_step, args.optimizer,
                                    learning_rate, args.moving_average_decay,
                                    tf.trainable_variables())

        # Create a saver
        saver = tf.train.Saver(max_to_keep=3)
        #train_op = facenet.train(total_loss, global_step, args.optimizer,
        #    learning_rate, args.moving_average_decay, tf.global_variables())

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()

        # Start running operations on the Graph.
        gpu_options = tf.GPUOptions(
            per_process_gpu_memory_fraction=args.gpu_memory_fraction)
        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # Initialize variables
        sess.run(tf.global_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})
        sess.run(tf.local_variables_initializer(),
                 feed_dict={phase_train_placeholder: True})

        summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
        coord = tf.train.Coordinator()
        tf.train.start_queue_runners(coord=coord, sess=sess)

        with sess.as_default():

            if args.pretrained_model:
                print('Restoring pretrained model: %s' % args.pretrained_model)
                loader.restore(sess, os.path.expanduser(args.pretrained_model))

            # Training and validation loop
            epoch = 0
            while epoch < args.max_nrof_epochs:
                step = sess.run(global_step, feed_dict=None)
                epoch = step // args.epoch_size
                # Train for one epoch

                train(args, sess, train_set, epoch, image_paths_placeholder,
                      labels_placeholder, labels_batch, batch_size_placeholder,
                      learning_rate_placeholder, phase_train_placeholder,
                      enqueue_op, input_queue, global_step, embeddings,
                      total_loss, train_op, summary_op, summary_writer,
                      args.learning_rate_schedule_file, DIM_HASHCODE, anchor,
                      positive, negative, triplet_loss)

                # Save variables and the metagraph if it doesn't exist already
                save_variables_and_metagraph(sess, saver, summary_writer,
                                             model_dir, subdir, step)

                # Evaluate on validation data set
                if args.validation_dir:
                    evaluate(sess, validation_paths, embeddings, labels_batch,
                             image_paths_placeholder, labels_placeholder,
                             batch_size_placeholder, learning_rate_placeholder,
                             phase_train_placeholder, enqueue_op,
                             actual_issame, args.batch_size,
                             args.validation_nrof_folds, log_dir, step,
                             summary_writer, DIM_HASHCODE)

    return model_dir
예제 #21
0
    PB = sys.argv[2]
    OUT = sys.argv[3]
    print(IN, PB, OUT)
else:
    exit(0)

# 初期設定
tf.reset_default_graph()
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
sess = tf.Session(config=tf_config)

# モデルの構築
images = tf.placeholder(tf.float32, (None, 224, 224, 3), name='images')
labels = tf.placeholder(tf.int32, (None, 1, 1, 8), name='labels')
with slim.arg_scope(resnet_v1.resnet_arg_scope()):
    logits, end_points = resnet_v1.resnet_v1_50(images,
                                                is_training=False,
                                                num_classes=8)

# チェックポイントの読み込み
saver = tf.train.Saver()
saver.restore(save_path=IN, sess=sess)

# freeze graph
output_nodes = ['resnet_v1_50/SpatialSqueeze']
frozen_graph = tf.graph_util.convert_variables_to_constants(
    sess, sess.graph.as_graph_def(), output_node_names=output_nodes)
from convert_relu6 import convertRelu6
frozen_graph = convertRelu6(frozen_graph)
 def __init__(self, **kwargs):
     super().__init__('resnet101.ckpt', 'resnet_v1_101', \
             resnet_v1.resnet_arg_scope(), \
             resnet_v1.resnet_v1_101, 0, **kwargs)
def net_arg_scope():
    if net_type == 'resnet':
        return resnet_v1.resnet_arg_scope()
    elif net_type == 'vgg':
        return vgg.vgg_arg_scope(False)
예제 #24
0
    def _feature_extractor(self, input, mode, scope=None, relu_leakiness=0.1):
        image = tf.placeholder_with_default(input, (None, 300, 300, 3),
                                            'input_image')

        pyramid_map = {
            'C1': 'FeatureX1/resnet_v1_50/conv1/Relu:0',
            'C2': 'FeatureX1/resnet_v1_50/block1/unit_2/bottleneck_v1',
            'C3': 'FeatureX1/resnet_v1_50/block2/unit_3/bottleneck_v1',
            'C4': 'FeatureX1/resnet_v1_50/block3/unit_5/bottleneck_v1',
            'C5': 'FeatureX1/resnet_v1_50/block4/unit_3/bottleneck_v1',
        }

        if scope is not None:
            for key, value in pyramid_map.iteritems():
                pyramid_map[key] = scope + "/" + value

        with tf.variable_scope("FeatureX1"):
            with slim.arg_scope(
                    resnet_v1.resnet_arg_scope(weight_decay=0.000005)):
                logits, end_points = resnet_v1.resnet_v1_50(
                    image, 1000, is_training=self.mode == 'train')

            pyramid = pyramid_network.build_pyramid(pyramid_map, end_points)

        extra_train_ops = []
        py_features = [pyramid['P5']]

        with tf.variable_scope("FeatureX2"):
            with tf.variable_scope("pyramid_2"):
                x = pyramid['P2']

                with tf.variable_scope("block_0"):
                    x, extra_train_ops = resnet_utils.residual(
                        x,
                        256,
                        64,
                        resnet_utils.stride_arr(2),
                        mode,
                        extra_train_ops,
                        relu_leakiness,
                        activate_before_residual=True)

                with tf.variable_scope("block_1"):
                    x, extra_train_ops = resnet_utils.residual(
                        x,
                        64,
                        64,
                        resnet_utils.stride_arr(2),
                        mode,
                        extra_train_ops,
                        relu_leakiness,
                        activate_before_residual=False)

                with tf.variable_scope("block_2"):
                    x, extra_train_ops = resnet_utils.residual(
                        x,
                        64,
                        64,
                        resnet_utils.stride_arr(2),
                        mode,
                        extra_train_ops,
                        relu_leakiness,
                        activate_before_residual=False)

                py_features.append(x)

            with tf.variable_scope("pyramid_3"):
                x = pyramid['P3']

                with tf.variable_scope("block_0"):
                    x, extra_train_ops = resnet_utils.residual(
                        x,
                        256,
                        64,
                        resnet_utils.stride_arr(2),
                        mode,
                        extra_train_ops,
                        relu_leakiness,
                        activate_before_residual=True)

                with tf.variable_scope("block_1"):
                    x, extra_train_ops = resnet_utils.residual(
                        x,
                        64,
                        64,
                        resnet_utils.stride_arr(2),
                        mode,
                        extra_train_ops,
                        relu_leakiness,
                        activate_before_residual=False)

                py_features.append(x)

            with tf.variable_scope("pyramid_4"):
                x = pyramid['P4']

                with tf.variable_scope("block_0"):
                    x, extra_train_ops = resnet_utils.residual(
                        x,
                        256,
                        64,
                        resnet_utils.stride_arr(2),
                        mode,
                        extra_train_ops,
                        relu_leakiness,
                        activate_before_residual=True)

                py_features.append(x)

            x = tf.concat(py_features, axis=3, name='concat')

            with tf.variable_scope("block_0"):
                x, extra_train_ops = resnet_utils.residual(
                    x,
                    448,
                    256,
                    resnet_utils.stride_arr(2),
                    mode,
                    extra_train_ops,
                    relu_leakiness,
                    activate_before_residual=True)

            with tf.variable_scope("block_1"):
                x, extra_train_ops = resnet_utils.residual(
                    x,
                    256,
                    256,
                    resnet_utils.stride_arr(2),
                    mode,
                    extra_train_ops,
                    relu_leakiness,
                    activate_before_residual=False)

            global_avg = tf.reduce_mean(x, [1, 2], name='global_avg')

        feature = tf.nn.l2_normalize(global_avg, 0, name='Feature')

        return feature, extra_train_ops
예제 #25
0
def build_pspnet(inputs,
                 label_size,
                 num_classes,
                 preset_model='PSPNet-Res50',
                 pooling_type="MAX",
                 weight_decay=1e-5,
                 upscaling_method="bilinear",
                 is_training=True,
                 pretrained_dir="models"):
    """
    Builds the PSPNet model. 

    Arguments:
      inputs: The input tensor
      label_size: Size of the final label tensor. We need to know this for proper upscaling 
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes
      pooling_type: Max or Average pooling

    Returns:
      PSPNet model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'PSPNet-Res50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                inputs, is_training=is_training, scope='resnet_v1_50')
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'),
                slim.get_model_variables('resnet_v1_50'))
    elif preset_model == 'PSPNet-Res101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs, is_training=is_training, scope='resnet_v1_101')
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'),
                slim.get_model_variables('resnet_v1_101'))
    elif preset_model == 'PSPNet-Res152':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_152(
                inputs, is_training=is_training, scope='resnet_v1_152')
            # PSPNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'),
                slim.get_model_variables('resnet_v1_152'))
    else:
        raise ValueError(
            "Unsupported ResNet model '%s'. This function only supports ResNet 50, ResNet 101, and ResNet 152"
            % (preset_model))

    f = [
        end_points['pool5'], end_points['pool4'], end_points['pool3'],
        end_points['pool2']
    ]

    feature_map_shape = [int(x / 8.0) for x in label_size]
    psp = PyramidPoolingModule(f[2],
                               feature_map_shape=feature_map_shape,
                               pooling_type=pooling_type)

    net = slim.conv2d(psp, 512, [3, 3], activation_fn=None)
    net = slim.batch_norm(net)
    net = tf.nn.relu(net)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)

    net = slim.dropout(net, keep_prob=(0.9))

    net = slim.conv2d(net,
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
예제 #26
0
def res_v1_101_lstm(input_imgs, input_seqs, input_masks,
                    batch_size, embedding_size, vocab_size,
                    is_training, lstm_dropout_keep_prob):
    with tf.variable_scope('res_v1_101_lstm'):
        # Sequence embedding layer
        with tf.variable_scope("seq_embedding"):
            embedding_map = tf.get_variable(
                name="map",
                shape=[vocab_size, embedding_size],
                initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08))

        # Image feature extraction layer
        with slim.arg_scope(resnet_v1.resnet_arg_scope(trainable=is_training)):
            # Set is_training = False to fix running mean/variance of batch normalization
            image_feature, _ = resnet_v1.resnet_v1_101(input_imgs, None, is_training=False, output_stride=32)

        # Image embedding layer
        image_feature = tf.squeeze(image_feature, axis=[1, 2])
        image_embedding = slim.fully_connected(image_feature, embedding_size, activation_fn=None,
                                               weights_initializer=tf.truncated_normal_initializer(0, 0.01),
                                               biases_initializer=tf.zeros_initializer,
                                               weights_regularizer=slim.l2_regularizer(0.0005),
                                               scope = 'image_embedding')

        # LSTM layer
        lstm_cell = tf.contrib.rnn.BasicLSTMCell(num_units=embedding_size, state_is_tuple=True)
        # Training process
        if is_training is True:
            lstm_cell = tf.contrib.rnn.DropoutWrapper(lstm_cell,
                                                      input_keep_prob=lstm_dropout_keep_prob,
                                                      output_keep_prob=lstm_dropout_keep_prob)
            seq_embeddings = tf.nn.embedding_lookup(embedding_map, input_seqs)

            with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope:
                # Feed the image embeddings to set the initial LSTM state.
                zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
                _, initial_state = lstm_cell(image_embedding, zero_state)
                lstm_scope.reuse_variables()
                sequence_length = tf.reduce_sum(input_masks, 1)
                lstm_outputs, _ = tf.nn.dynamic_rnn(cell=lstm_cell,
                                                    inputs=seq_embeddings,
                                                    sequence_length=sequence_length,
                                                    initial_state=initial_state,
                                                    dtype=tf.float32,
                                                    scope=lstm_scope)
                lstm_outputs = tf.reshape(lstm_outputs, [-1, lstm_cell.output_size])

            # Word logits layer
            output_logits = slim.fully_connected(lstm_outputs, vocab_size, activation_fn=None,
                                                weights_initializer=tf.truncated_normal_initializer(0, 0.01),
                                                biases_initializer=tf.zeros_initializer,
                                                weights_regularizer=slim.l2_regularizer(0.0005),
                                                scope='logits'
                                                )

            variables = slim.get_variables('res_v1_101_lstm')
            res_variables = {}
            for variable in variables:
                if 'resnet_v1_101' in variable.name:
                    res_variables[variable.name[16:-2]] = variable

            return output_logits, res_variables
        # Inference process
        else:
            weights = tf.get_variable("logits/weights", [embedding_size, vocab_size])
            biases = tf.get_variable("logits/biases", [vocab_size])
            with tf.variable_scope("lstm", initializer=tf.random_uniform_initializer(minval=-0.08, maxval=0.08)) as lstm_scope:
                # Feed the image embeddings to set the initial LSTM state.
                zero_state = lstm_cell.zero_state(batch_size=batch_size, dtype=tf.float32)
                _, initial_state = lstm_cell(image_embedding, zero_state)
                lstm_scope.reuse_variables()
                memory_state = initial_state
                output_words = [input_seqs[0]]
                # TODO: replace the end condition of the loop with meeting the end word
                for _ in range(30):
                    input_seqs = tf.nn.embedding_lookup(embedding_map, input_seqs)
                    output_seqs, memory_state = lstm_cell(input_seqs, memory_state)
                    output_logits = tf.matmul(output_seqs, weights) + biases
                    output_word = tf.argmax(output_logits, -1)
                    output_words.append(output_word[0])
                    input_seqs = output_word
                output_words = tf.stack(output_words)

            return output_words
예제 #27
0
def build_refinenet(inputs,
                    num_classes,
                    preset_model='RefineNet-Res101',
                    weight_decay=1e-5,
                    is_training=True,
                    upscaling_method="bilinear",
                    pretrained_dir="models"):
    """
    Builds the RefineNet model. 

    Arguments:
      inputs: The input tensor
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      RefineNet model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'RefineNet-Res50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(
                inputs, is_training=is_training, scope='resnet_v1_50')
            # RefineNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'),
                slim.get_model_variables('resnet_v1_50'))
    elif preset_model == 'RefineNet-Res101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(
                inputs, is_training=is_training, scope='resnet_v1_101')
            # RefineNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'),
                slim.get_model_variables('resnet_v1_101'))
    elif preset_model == 'RefineNet-Res152':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_152(
                inputs, is_training=is_training, scope='resnet_v1_152')
            # RefineNet requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(
                os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'),
                slim.get_model_variables('resnet_v1_152'))
    else:
        raise ValueError(
            "Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152"
            % (preset_model))

    f = [
        end_points['pool5'], end_points['pool4'], end_points['pool3'],
        end_points['pool2']
    ]

    g = [None, None, None, None]
    h = [None, None, None, None]

    for i in range(4):
        h[i] = slim.conv2d(f[i], 256, 1)

    g[0] = RefineBlock(high_inputs=None, low_inputs=h[0])
    g[1] = RefineBlock(g[0], h[1])
    g[2] = RefineBlock(g[1], h[2])
    g[3] = RefineBlock(g[2], h[3])

    # g[3]=Upsampling(g[3],scale=4)

    if upscaling_method.lower() == "conv":
        net = ConvUpscaleBlock(net, 256, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 256)
        net = ConvUpscaleBlock(net, 128, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 128)
        net = ConvUpscaleBlock(net, 64, kernel_size=[3, 3], scale=2)
        net = ConvBlock(net, 64)
    elif upscaling_method.lower() == "bilinear":
        net = Upsampling(net, label_size)

    net = slim.conv2d(g[3],
                      num_classes, [1, 1],
                      activation_fn=None,
                      scope='logits')

    return net, init_fn
예제 #28
0
def export():
    # Create index->synset mapping
    synsets = []
    with open(SYNSET_FILE) as f:
        synsets = f.read().splitlines()
    # Create synset->metadata mapping
    texts = {}
    with open(METADATA_FILE) as f:
        for line in f.read().splitlines():
            parts = line.split('\t')
            assert len(parts) == 2
            texts[parts[0]] = parts[1]

    with tf.Graph().as_default():
        # Build inference model.
        # Please refer to Tensorflow inception model for details.

        # Input transformation.
        # serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
        # feature_configs = {
        #     'image/encoded': tf.FixedLenFeature(
        #         shape=[], dtype=tf.string),
        # }
        # tf_example = tf.parse_example(serialized_tf_example, feature_configs)
        # jpegs = tf_example['image/encoded']

        serialized_tf_example = tf.placeholder(tf.string, name='tf_example')
        feature_configs = {
            'x': tf.FixedLenFeature(shape=[], dtype=tf.float32),
        }
        tf_example = tf.parse_example(serialized_tf_example, feature_configs)

        # reshape the input image to its original dimension
        tf_example['x'] = tf.reshape(tf_example['x'], (1, 224, 224, 3))
        input_tensor = tf.identity(
            tf_example['x'], name='x')  # use tf.identity() to assign name
        # images = tf.map_fn(preprocess_image, jpegs, dtype=tf.float32)

        # Run inference.
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            net, end_points = resnet_v1.resnet_v1_50(input_tensor,
                                                     1000,
                                                     is_training=False)
        # logits, _ = inception_model.inference(images, NUM_CLASSES + 1)

        # Transform output to topK result.
        values, indices = tf.nn.top_k(net, NUM_TOP_CLASSES)

        # Create a constant string Tensor where the i'th element is
        # the human readable class description for the i'th index.
        # Note that the 0th index is an unused background class
        # (see inception model definition code).
        class_descriptions = ['unused background']
        for s in synsets:
            class_descriptions.append(texts[s])
        class_tensor = tf.constant(class_descriptions)

        table = tf.contrib.lookup.index_to_string_table_from_tensor(
            class_tensor)
        classes = table.lookup(tf.to_int64(indices))

        # Restore variables from training checkpoint.
        # variable_averages = tf.train.ExponentialMovingAverage(
        #     inception_model.MOVING_AVERAGE_DECAY)
        # variables_to_restore = variable_averages.variables_to_restore()
        # saver = tf.train.Saver(variables_to_restore)
        saver = tf.train.Saver()
        with tf.Session() as sess:
            # Restore variables from training checkpoints.
            # ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
            # if ckpt and ckpt.model_checkpoint_path:
            # saver.restore(sess, ckpt.model_checkpoint_path)

            # Assuming model_checkpoint_path looks something like:
            #   /my-favorite-path/imagenet_train/model.ckpt-0,
            # extract global_step from it.
            #   global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
            #   print('Successfully loaded model from %s at step=%s.' %
            #         (ckpt.model_checkpoint_path, global_step))
            # else:
            #   print('No checkpoint file found at %s' % FLAGS.checkpoint_dir)
            #   return

            # Export inference model.

            saver.restore(
                sess, os.path.join(pre_trained_model_dir, "resnet_v1_50.ckpt"))

            print("Model", model_name, "restored.")

            output_path = os.path.join(
                tf.compat.as_bytes(FLAGS.output_dir),
                tf.compat.as_bytes(str(FLAGS.model_version)))
            print('Exporting trained model to', output_path)
            builder = tf.saved_model.builder.SavedModelBuilder(output_path)

            # Build the signature_def_map.
            classify_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(
                serialized_tf_example)
            classes_output_tensor_info = tf.saved_model.utils.build_tensor_info(
                classes)
            scores_output_tensor_info = tf.saved_model.utils.build_tensor_info(
                values)

            classification_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={
                        tf.saved_model.signature_constants.CLASSIFY_INPUTS:
                        classify_inputs_tensor_info
                    },
                    outputs={
                        tf.saved_model.signature_constants.CLASSIFY_OUTPUT_CLASSES:
                        classes_output_tensor_info,
                        tf.saved_model.signature_constants.CLASSIFY_OUTPUT_SCORES:
                        scores_output_tensor_info
                    },
                    method_name=tf.saved_model.signature_constants.
                    CLASSIFY_METHOD_NAME))

            predict_inputs_tensor_info = tf.saved_model.utils.build_tensor_info(
                input_tensor)
            prediction_signature = (
                tf.saved_model.signature_def_utils.build_signature_def(
                    inputs={'images': predict_inputs_tensor_info},
                    outputs={
                        'classes': classes_output_tensor_info,
                        'scores': scores_output_tensor_info
                    },
                    method_name=tf.saved_model.signature_constants.
                    PREDICT_METHOD_NAME))

            builder.add_meta_graph_and_variables(
                sess, [tf.saved_model.tag_constants.SERVING],
                signature_def_map={
                    'predict_images':
                    prediction_signature,
                    tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY:
                    classification_signature,
                },
                main_op=tf.tables_initializer(),
                strip_default_attrs=True)

            builder.save()
            print('Successfully exported model to %s' % FLAGS.output_dir)
def train(args):

    dataset = CifarDataSet(args.batch_size, args.data_dir)
    dataset.make_batch_train()
    dataset.make_batch_valid_or_test()
    if 'cifar-100' in args.data_dir:
        num_classes = 100
    else:
        num_classes = 10

    model = resnet_v1.resnet_v1_110
    # it's actually a 112 since there are 2 additional 1x1 conv for shortcuts
    print ("Data loaded! Building model...")

    # for training
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net, end_points = model(dataset.images_train, num_classes)
        logits = tf.squeeze(net, [1, 2], name='SqueezedLogits')

    # for evaluating
    with slim.arg_scope(resnet_v1.resnet_arg_scope()):
        net_eval, _ = model(dataset.images_vt, num_classes, is_training=False, reuse=True)
        predictions = tf.argmax(tf.squeeze(net_eval, [1, 2]), axis=-1)

    cross_entropy_loss_op = tf.losses.sparse_softmax_cross_entropy(
        labels=dataset.labels_train, logits=logits)

    l2_loss_op = tf.losses.get_regularization_loss()

    loss_op = cross_entropy_loss_op + l2_loss_op

    num_train_batches = dataset.images_np['train'].shape[0] // args.batch_size
    gstep_op, lr_op, train_op, saver, sess = get_train_ops(args, num_train_batches, loss_op)

    print ("Train ops get! Start training...")

    while True:

        cross_entropy_loss, l2_loss, gstep, lr, _ = sess.run([
            cross_entropy_loss_op, l2_loss_op, gstep_op, lr_op, train_op
        ])

        cur_epoch = gstep // num_train_batches + 1

        if gstep % args.log_every == 0:
            log_string = "({:5d}/{:5d})".format(gstep, num_train_batches * args.epoch)
            log_string += " cross entropy loss: {:.4f}, l2 loss: {:.4f},".format(cross_entropy_loss, l2_loss)
            log_string += " lr: {:.4f}".format(lr)
            log_string += " (ep: {:3d})".format(cur_epoch)
            print (log_string)

        if (gstep + 1) % num_train_batches == 0:

            print ("Saving .ckpt and evaluating with validation set...")

            saver.save(sess, os.path.join(args.save_dir, 'model.ckpt'), global_step=cur_epoch)

            sess.run(dataset.iterator_vt.initializer, feed_dict={dataset.validation: True})

            corrects = 0
            num_eval_batches = dataset.images_np['valid'].shape[0] // dataset.eval_batch_size
            for step in range(num_eval_batches):
                preds, labels = sess.run([predictions, dataset.labels_vt])
                corrects += np.sum(preds == labels)

            print ("validation accuracy: {:.3f}% ({:4d}/{:4d})".format(
                100 * corrects / dataset.images_np['valid'].shape[0],\
                corrects, dataset.images_np['valid'].shape[0]
            ))
            print ("=" * 80)

        if (gstep + 1) % (num_train_batches * args.eval_every) == 0:

            print ("Evaluating with test set...")

            sess.run(dataset.iterator_vt.initializer, feed_dict={dataset.validation: False})

            corrects = 0
            num_eval_batches = dataset.images_np['test'].shape[0] // dataset.eval_batch_size
            for step in range(num_eval_batches):
                preds, labels = sess.run([predictions, dataset.labels_vt])
                corrects += np.sum(preds == labels)

            print ("test accuracy: {:.3f}% ({:5d}/{:5d})".format(
                100 * corrects / dataset.images_np['test'].shape[0],
                corrects, dataset.images_np['test'].shape[0]
            ))
            print ("=" * 80)

        if cur_epoch > args.epoch:
            break

    print ("Done!")
예제 #30
0
def build_gcn(inputs, num_classes, preset_model='GCN-Res101', weight_decay=1e-5, is_training=True, upscaling_method="bilinear", pretrained_dir="models"):
    """
    Builds the GCN model. 

    Arguments:
      inputs: The input tensor
      preset_model: Which model you want to use. Select which ResNet model to use for feature extraction 
      num_classes: Number of classes

    Returns:
      GCN model
    """

    inputs = mean_image_subtraction(inputs)

    if preset_model == 'GCN-Res50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_50(inputs, is_training=is_training, scope='resnet_v1_50')
            # GCN requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_50.ckpt'), slim.get_model_variables('resnet_v1_50'))
    elif preset_model == 'GCN-Res101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_101(inputs, is_training=is_training, scope='resnet_v1_101')
            # GCN requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_101.ckpt'), slim.get_model_variables('resnet_v1_101'))
    elif preset_model == 'GCN-Res152':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
            logits, end_points = resnet_v1.resnet_v1_152(inputs, is_training=is_training, scope='resnet_v1_152')
            # GCN requires pre-trained ResNet weights
            init_fn = slim.assign_from_checkpoint_fn(os.path.join(pretrained_dir, 'resnet_v1_152.ckpt'), slim.get_model_variables('resnet_v1_152'))
    else:
    	raise ValueError("Unsupported ResNet model '%s'. This function only supports ResNet 101 and ResNet 152" % (preset_model))

    


    res = [end_points['pool5'], end_points['pool4'],
         end_points['pool3'], end_points['pool2']]

    down_5 = GlobalConvBlock(res[0], n_filters=21, size=3)
    down_5 = BoundaryRefinementBlock(down_5, n_filters=21, kernel_size=[3, 3])
    down_5 = ConvUpscaleBlock(down_5, n_filters=21, kernel_size=[3, 3], scale=2)

    down_4 = GlobalConvBlock(res[1], n_filters=21, size=3)
    down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3])
    down_4 = tf.add(down_4, down_5)
    down_4 = BoundaryRefinementBlock(down_4, n_filters=21, kernel_size=[3, 3])
    down_4 = ConvUpscaleBlock(down_4, n_filters=21, kernel_size=[3, 3], scale=2)

    down_3 = GlobalConvBlock(res[2], n_filters=21, size=3)
    down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3])
    down_3 = tf.add(down_3, down_4)
    down_3 = BoundaryRefinementBlock(down_3, n_filters=21, kernel_size=[3, 3])
    down_3 = ConvUpscaleBlock(down_3, n_filters=21, kernel_size=[3, 3], scale=2)

    down_2 = GlobalConvBlock(res[3], n_filters=21, size=3)
    down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    down_2 = tf.add(down_2, down_3)
    down_2 = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    down_2 = ConvUpscaleBlock(down_2, n_filters=21, kernel_size=[3, 3], scale=2)

    net = BoundaryRefinementBlock(down_2, n_filters=21, kernel_size=[3, 3])
    net = ConvUpscaleBlock(net, n_filters=21, kernel_size=[3, 3], scale=2)
    net = BoundaryRefinementBlock(net, n_filters=21, kernel_size=[3, 3])

    net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, scope='logits')

    return net, init_fn
예제 #31
0
    def add_extra_layers(self, net):
        with slim.arg_scope(resnet_v1.resnet_arg_scope(is_training=self.training,
                                                       weight_decay=self.weight_decay,
                                                       batch_norm_decay=args.bn_decay)):
            block_depth = 2
            num_fm = 2048
            blocks = [
                resnet_utils.Block(
                    'block5', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                resnet_utils.Block(
                    'block6', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                resnet_utils.Block(
                    'block7', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
            ]
            if args.image_size == 512:
                blocks += [
                    resnet_utils.Block(
                        'block8', bottleneck, [(num_fm, num_fm//4, 2)] + [(num_fm, num_fm//4, 1)] * (block_depth-1)),
                ]

            net, endpoints = resnet_v1.resnet_v1(net, blocks,
                                                 global_pool=False,
                                                 include_root_block=False,
                                                 reuse=self.reuse,
                                                 scope=DEFAULT_SSD_SCOPE)
            self.outputs.update(endpoints)
            with tf.variable_scope(DEFAULT_SSD_SCOPE+"_back", reuse=self.reuse):
                end_points_collection = "reverse_ssd_end_points"
                with slim.arg_scope([slim.conv2d, bottleneck_skip],
                                    outputs_collections=end_points_collection):
                    top_fm = args.top_fm
                    int_fm = top_fm//4
                    if args.image_size == 512:
                        # as long as the number of pooling layers is bigger due to
                        # the higher resolution, an extra layer is appended
                        net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block7'],
                                              top_fm, int_fm, scope='block_rev7')

                    net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block6'],
                                          top_fm, int_fm, scope='block_rev6')
                    net = bottleneck_skip(net, self.outputs[DEFAULT_SSD_SCOPE+'/block5'],
                                          top_fm, int_fm, scope='block_rev5')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block4'],
                                          top_fm, int_fm, scope='block_rev4')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block3'],
                                          top_fm, int_fm, scope='block_rev3')
                    net = bottleneck_skip(net, self.outputs[self.scope+'/block2'],
                                          top_fm, int_fm, scope='block_rev2')
                    if args.x4:
                        # To provide stride 4 we add one more layer with upsampling
                        net = bottleneck_skip(net, self.outputs[self.scope+'/block1'],
                                              top_fm, int_fm, scope='block_rev1')
                endpoints = slim.utils.convert_collection_to_dict(end_points_collection)
            self.outputs.update(endpoints)

            # Creating an output of spatial resolution 1x1 with conventional name 'pool6'
            if args.image_size == 512:
                self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\
                        tf.reduce_mean(self.outputs['ssd_back/block_rev7/shortcut'],
                                       [1, 2], name='pool6', keep_dims=True)
            else:
                self.outputs[DEFAULT_SSD_SCOPE+'/pool6'] =\
                        tf.reduce_mean(self.outputs['ssd_back/block_rev6/shortcut'],
                                       [1, 2], name='pool6', keep_dims=True)