Esempio n. 1
0
    def loss(self, emb, sa, sn):

        unit_emb = tf.nn.l2_normalize(emb, 1)

        a, p, n = tf.split(emb, 3)
        ua, up, un = tf.split(unit_emb, 3)

        triplet_cost, dist_pos, dist_neg = symbf.triplet_loss(ua,
                                                              up,
                                                              un,
                                                              margin=0.5,
                                                              extra=True,
                                                              scope="loss")

        direction_cost = tf.reduce_mean(
            tf.maximum(0.,
                       tf.sign(sa - sn) * tf.transpose(a - n) + 0.2))

        return triplet_cost + direction_cost, dist_pos, dist_neg
Esempio n. 2
0
 def loss(self, a, p, n):
     return symbf.triplet_loss(a, p, n, 5., extra=True, scope="loss")
    def _build_graph(self, inputs):
        anc, pos, neg = inputs

        inputs = tf.concat([anc, pos, neg], axis=0)
        inputs = tf.cast(inputs, tf.float32) * (1.0 / 255)
        inputs = tf.placeholder_with_default(
            inputs,
            shape=[None, INPUT_SHAPE, INPUT_SHAPE, 3],
            name='input_frames')

        image_mean = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32)
        image_std = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32)

        inputs = (inputs - image_mean) / image_std

        if self.data_format == 'NCHW':
            inputs = tf.transpose(inputs, [0, 3, 1, 2])

        def shortcut(l, n_in, n_out, stride):
            if n_in != n_out:
                return Conv2D('convshortcut', l, n_out, 1, stride=stride)
            else:
                return l

        def basicblock(l, ch_out, stride, preact):
            ch_in = l.get_shape().as_list()[1]
            if preact == 'both_preact':
                l = BNReLU('preact', l)
                input = l
            elif preact != 'no_preact':
                input = l
                l = BNReLU('preact', l)
            else:
                input = l
            l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU)
            l = Conv2D('conv2', l, ch_out, 3)
            return l + shortcut(input, ch_in, ch_out, stride)

        def bottleneck(l, ch_out, stride, preact):
            ch_in = l.get_shape().as_list()[1]
            if preact == 'both_preact':
                l = BNReLU('preact', l)
                input = l
            elif preact != 'no_preact':
                input = l
                l = BNReLU('preact', l)
            else:
                input = l
            l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU)
            l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU)
            l = Conv2D('conv3', l, ch_out * 4, 1)
            return l + shortcut(input, ch_in, ch_out * 4, stride)

        def layer(l,
                  layername,
                  block_func,
                  features,
                  count,
                  stride,
                  first=False):
            with tf.variable_scope(layername):
                with tf.variable_scope('block0'):
                    l = block_func(l, features, stride,
                                   'no_preact' if first else 'both_preact')
                for i in range(1, count):
                    with tf.variable_scope('block{}'.format(i)):
                        l = block_func(l, features, 1, 'default')
                return l

        cfg = {
            18: ([2, 2, 2, 2], basicblock),
            34: ([3, 4, 6, 3], basicblock),
            50: ([3, 4, 6, 3], bottleneck),
            101: ([3, 4, 23, 3], bottleneck)
        }
        defs, block_func = cfg[self.depth]

        with argscope(Conv2D, nl=tf.identity, use_bias=False,
                      W_init=variance_scaling_initializer(mode='FAN_OUT')), \
                argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format), \
                argscope(BatchNorm, use_local_stat=True):
            logits = (LinearWrap(inputs).Conv2D(
                'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling(
                    'pool0', shape=3, stride=2, padding='SAME').apply(
                        layer,
                        'group0',
                        block_func,
                        64,
                        defs[0],
                        1,
                        first=True).apply(
                            layer, 'group1', block_func, 128, defs[1],
                            2).apply(layer, 'group2', block_func, 256, defs[2],
                                     2).apply(layer, 'group3', block_func, 512,
                                              defs[3], 2).BNReLU('bnlast').
                      GlobalAvgPooling('gap').FullyConnected('linear',
                                                             1000,
                                                             nl=tf.identity)())

        tf.identity(logits, name='encoding')
        encodings = tf.identity(normalize(logits), name='normalized_encoding')
        anc_enc, pos_enc, neg_enc = tf.split(encodings, 3, axis=0)

        wd_cost = regularize_cost('.*/W',
                                  l2_regularizer(1e-4),
                                  name='l2_regularize_loss')
        add_moving_summary(wd_cost)

        loss, pos_dist, neg_dist = symbf.triplet_loss(anc_enc,
                                                      pos_enc,
                                                      neg_enc,
                                                      0.5,
                                                      extra=True,
                                                      scope="loss")
        self.cost = tf.add_n([loss, wd_cost], name='cost')
        add_moving_summary(pos_dist, neg_dist, self.cost, loss)