def loss(self, emb, sa, sn): unit_emb = tf.nn.l2_normalize(emb, 1) a, p, n = tf.split(emb, 3) ua, up, un = tf.split(unit_emb, 3) triplet_cost, dist_pos, dist_neg = symbf.triplet_loss(ua, up, un, margin=0.5, extra=True, scope="loss") direction_cost = tf.reduce_mean( tf.maximum(0., tf.sign(sa - sn) * tf.transpose(a - n) + 0.2)) return triplet_cost + direction_cost, dist_pos, dist_neg
def loss(self, a, p, n): return symbf.triplet_loss(a, p, n, 5., extra=True, scope="loss")
def _build_graph(self, inputs): anc, pos, neg = inputs inputs = tf.concat([anc, pos, neg], axis=0) inputs = tf.cast(inputs, tf.float32) * (1.0 / 255) inputs = tf.placeholder_with_default( inputs, shape=[None, INPUT_SHAPE, INPUT_SHAPE, 3], name='input_frames') image_mean = tf.constant([0.485, 0.456, 0.406], dtype=tf.float32) image_std = tf.constant([0.229, 0.224, 0.225], dtype=tf.float32) inputs = (inputs - image_mean) / image_std if self.data_format == 'NCHW': inputs = tf.transpose(inputs, [0, 3, 1, 2]) def shortcut(l, n_in, n_out, stride): if n_in != n_out: return Conv2D('convshortcut', l, n_out, 1, stride=stride) else: return l def basicblock(l, ch_out, stride, preact): ch_in = l.get_shape().as_list()[1] if preact == 'both_preact': l = BNReLU('preact', l) input = l elif preact != 'no_preact': input = l l = BNReLU('preact', l) else: input = l l = Conv2D('conv1', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3) return l + shortcut(input, ch_in, ch_out, stride) def bottleneck(l, ch_out, stride, preact): ch_in = l.get_shape().as_list()[1] if preact == 'both_preact': l = BNReLU('preact', l) input = l elif preact != 'no_preact': input = l l = BNReLU('preact', l) else: input = l l = Conv2D('conv1', l, ch_out, 1, nl=BNReLU) l = Conv2D('conv2', l, ch_out, 3, stride=stride, nl=BNReLU) l = Conv2D('conv3', l, ch_out * 4, 1) return l + shortcut(input, ch_in, ch_out * 4, stride) def layer(l, layername, block_func, features, count, stride, first=False): with tf.variable_scope(layername): with tf.variable_scope('block0'): l = block_func(l, features, stride, 'no_preact' if first else 'both_preact') for i in range(1, count): with tf.variable_scope('block{}'.format(i)): l = block_func(l, features, 1, 'default') return l cfg = { 18: ([2, 2, 2, 2], basicblock), 34: ([3, 4, 6, 3], basicblock), 50: ([3, 4, 6, 3], bottleneck), 101: ([3, 4, 23, 3], bottleneck) } defs, block_func = cfg[self.depth] with argscope(Conv2D, nl=tf.identity, use_bias=False, W_init=variance_scaling_initializer(mode='FAN_OUT')), \ argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format), \ argscope(BatchNorm, use_local_stat=True): logits = (LinearWrap(inputs).Conv2D( 'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling( 'pool0', shape=3, stride=2, padding='SAME').apply( layer, 'group0', block_func, 64, defs[0], 1, first=True).apply( layer, 'group1', block_func, 128, defs[1], 2).apply(layer, 'group2', block_func, 256, defs[2], 2).apply(layer, 'group3', block_func, 512, defs[3], 2).BNReLU('bnlast'). GlobalAvgPooling('gap').FullyConnected('linear', 1000, nl=tf.identity)()) tf.identity(logits, name='encoding') encodings = tf.identity(normalize(logits), name='normalized_encoding') anc_enc, pos_enc, neg_enc = tf.split(encodings, 3, axis=0) wd_cost = regularize_cost('.*/W', l2_regularizer(1e-4), name='l2_regularize_loss') add_moving_summary(wd_cost) loss, pos_dist, neg_dist = symbf.triplet_loss(anc_enc, pos_enc, neg_enc, 0.5, extra=True, scope="loss") self.cost = tf.add_n([loss, wd_cost], name='cost') add_moving_summary(pos_dist, neg_dist, self.cost, loss)