Exemple #1
0
    def build_graph(self, image, label):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)], dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')    # p x 3

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image)
                   .AvgPooling('downsample', 2)
                   .Conv2D('conv0', 20, 5, padding='VALID')
                   .MaxPooling('pool0', 2)
                   .Conv2D('conv1', 20, 5, padding='VALID')
                   .FullyConnected('fc1', 32)
                   .FullyConnected('fct', 6, activation=tf.identity,
                                   kernel_initializer=tf.constant_initializer(),
                                   bias_initializer=tf.constant_initializer([1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords')  # b h w 2
            sampled = BilinearSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], activation=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1), max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled)
                  .FullyConnected('fc1', 256, activation=tf.nn.relu)
                  .FullyConnected('fc2', 128, activation=tf.nn.relu)
                  .FullyConnected('fct', 19, activation=tf.identity)())
        tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), name='incorrect_vector')
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        return tf.add_n([wd_cost, cost], name='cost')
Exemple #2
0
def multilevel_rpn_losses(
        multilevel_anchors, multilevel_label_logits, multilevel_box_logits):
    """
    Args:
        multilevel_anchors: #lvl RPNAnchors
        multilevel_label_logits: #lvl tensors of shape HxWxA
        multilevel_box_logits: #lvl tensors of shape HxWxAx4

    Returns:
        label_loss, box_loss
    """
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_anchors) == num_lvl
    assert len(multilevel_label_logits) == num_lvl
    assert len(multilevel_box_logits) == num_lvl

    losses = []
    with tf.name_scope('rpn_losses'):
        for lvl in range(num_lvl):
            anchors = multilevel_anchors[lvl]
            label_loss, box_loss = rpn_losses(
                anchors.gt_labels, anchors.encoded_gt_boxes(),
                multilevel_label_logits[lvl], multilevel_box_logits[lvl],
                name_scope='level{}'.format(lvl + 2))
            losses.extend([label_loss, box_loss])

        total_label_loss = tf.add_n(losses[::2], name='label_loss')
        total_box_loss = tf.add_n(losses[1::2], name='box_loss')
        add_moving_summary(total_label_loss, total_box_loss)
    return total_label_loss, total_box_loss
Exemple #3
0
    def _build_graph(self, inputs, is_training):
        state, action, reward, next_state, isOver = inputs
        self.predict_value = self._get_DQN_prediction(state, is_training)
        action_onehot = tf.one_hot(action, NUM_ACTIONS)
        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)    #N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
        add_moving_summary(max_pred_reward)
        self.greedy_choice = tf.argmax(self.predict_value, 1)   # N,

        with tf.variable_scope('target'):
            targetQ_predict_value = self._get_DQN_prediction(next_state, False)    # NxA

            # DQN
            #best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,

            # Double-DQN
            predict_onehot = tf.one_hot(self.greedy_choice, NUM_ACTIONS, 1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

            target = reward + (1.0 - tf.cast(isOver, tf.float32)) * GAMMA * tf.stop_gradient(best_v)

        sqrcost = tf.square(target - pred_action_value)
        abscost = tf.abs(target - pred_action_value)    # robust error func
        cost = tf.select(abscost < 1, sqrcost, abscost)
        summary.add_param_summary([('conv.*/W', ['histogram', 'rms']),
                                   ('fc.*/W', ['histogram', 'rms']) ])   # monitor all W
        self.cost = tf.reduce_mean(cost, name='cost')
Exemple #4
0
    def _build_graph(self, inputs):
        input, nextinput = inputs

        cell = rnn.MultiRNNCell([rnn.LSTMBlockCell(num_units=param.rnn_size)
                                for _ in range(param.num_rnn_layer)])

        def get_v(n):
            ret = tf.get_variable(n + '_unused', [param.batch_size, param.rnn_size],
                                  trainable=False,
                                  initializer=tf.constant_initializer())
            ret = tf.placeholder_with_default(ret, shape=[None, param.rnn_size], name=n)
            return ret
        initial = (rnn.LSTMStateTuple(get_v('c0'), get_v('h0')),
                   rnn.LSTMStateTuple(get_v('c1'), get_v('h1')))

        embeddingW = tf.get_variable('embedding', [param.vocab_size, param.rnn_size])
        input_feature = tf.nn.embedding_lookup(embeddingW, input)  # B x seqlen x rnnsize

        input_list = tf.unstack(input_feature, axis=1)  # seqlen x (Bxrnnsize)

        outputs, last_state = rnn.static_rnn(cell, input_list, initial, scope='rnnlm')
        last_state = tf.identity(last_state, 'last_state')

        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(outputs, 1), [-1, param.rnn_size])  # (Bxseqlen) x rnnsize
        logits = FullyConnected('fc', output, param.vocab_size, nl=tf.identity)
        tf.nn.softmax(logits / param.softmax_temprature, name='prob')

        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))
        self.cost = tf.reduce_mean(xent_loss, name='cost')
        summary.add_param_summary(('.*/W', ['histogram']))   # monitor histogram of all W
        summary.add_moving_summary(self.cost)
Exemple #5
0
    def _build_graph(self, inputs):
        input, output = inputs
        input, output = input / 128.0 - 1, output / 128.0 - 1

        with argscope([Conv2D, Deconv2D],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)), \
                argscope(LeakyReLU, alpha=0.2):
            with tf.variable_scope('gen'):
                fake_output = self.generator(input)
            with tf.variable_scope('discrim'):
                real_pred = self.discriminator(input, output)
            with tf.variable_scope('discrim', reuse=True):
                fake_pred = self.discriminator(input, fake_output)

        self.build_losses(real_pred, fake_pred)
        errL1 = tf.reduce_mean(tf.abs(fake_output - output), name='L1_loss')
        self.g_loss = tf.add(self.g_loss, LAMBDA * errL1, name='total_g_loss')
        add_moving_summary(errL1, self.g_loss)

        # tensorboard visualization
        if IN_CH == 1:
            input = tf.image.grayscale_to_rgb(input)
        if OUT_CH == 1:
            output = tf.image.grayscale_to_rgb(output)
            fake_output = tf.image.grayscale_to_rgb(fake_output)
        viz = (tf.concat([input, output, fake_output], 2) + 1.0) * 128.0
        viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz')
        tf.summary.image('input,output,fake', viz, max_outputs=max(30, BATCH))

        self.collect_variables()
    def build_graph(self, image_pos):
        image_pos = image_pos / 128.0 - 1

        z = tf.random_normal([self.batch, self.zdim], name='z_train')
        z = tf.placeholder_with_default(z, [None, self.zdim], name='z')

        with argscope([Conv2D, Conv2DTranspose, FullyConnected],
                      kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                image_gen = self.generator(z)
            tf.summary.image('generated-samples', image_gen, max_outputs=30)

            alpha = tf.random_uniform(shape=[self.batch, 1, 1, 1],
                                      minval=0., maxval=1., name='alpha')
            interp = image_pos + alpha * (image_gen - image_pos)

            with tf.variable_scope('discrim'):
                vecpos = self.discriminator(image_pos)
                vecneg = self.discriminator(image_gen)
                vec_interp = self.discriminator(interp)

        # the Wasserstein-GAN losses
        self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss')
        self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')

        # the gradient penalty loss
        gradients = tf.gradients(vec_interp, [interp])[0]
        gradients = tf.sqrt(tf.reduce_sum(tf.square(gradients), [1, 2, 3]))
        gradients_rms = symbolic_functions.rms(gradients, 'gradient_rms')
        gradient_penalty = tf.reduce_mean(tf.square(gradients - 1), name='gradient_penalty')
        add_moving_summary(self.d_loss, self.g_loss, gradient_penalty, gradients_rms)

        self.d_loss = tf.add(self.d_loss, 10 * gradient_penalty)

        self.collect_variables()
Exemple #7
0
def fpn_map_rois_to_levels(boxes):
    """
    Assign boxes to level 2~5.

    Args:
        boxes (nx4):

    Returns:
        [tf.Tensor]: 4 tensors for level 2-5. Each tensor is a vector of indices of boxes in its level.
        [tf.Tensor]: 4 tensors, the gathered boxes in each level.

    Be careful that the returned tensor could be empty.
    """
    sqrtarea = tf.sqrt(tf_area(boxes))
    level = tf.to_int32(tf.floor(
        4 + tf.log(sqrtarea * (1. / 224) + 1e-6) * (1.0 / np.log(2))))

    # RoI levels range from 2~5 (not 6)
    level_ids = [
        tf.where(level <= 2),
        tf.where(tf.equal(level, 3)),   # == is not supported
        tf.where(tf.equal(level, 4)),
        tf.where(level >= 5)]
    level_ids = [tf.reshape(x, [-1], name='roi_level{}_id'.format(i + 2))
                 for i, x in enumerate(level_ids)]
    num_in_levels = [tf.size(x, name='num_roi_level{}'.format(i + 2))
                     for i, x in enumerate(level_ids)]
    add_moving_summary(*num_in_levels)

    level_boxes = [tf.gather(boxes, ids) for ids in level_ids]
    return level_ids, level_boxes
Exemple #8
0
    def build_losses(self, logits_real, logits_fake):
        """D and G play two-player minimax game with value function V(G,D)

          min_G max _D V(D, G) = IE_{x ~ p_data} [log D(x)] + IE_{z ~ p_fake} [log (1 - D(G(z)))]

        Args:
            logits_real (tf.Tensor): discrim logits from real samples
            logits_fake (tf.Tensor): discrim logits from fake samples produced by generator
        """
        with tf.name_scope("GAN_loss"):
            score_real = tf.sigmoid(logits_real)
            score_fake = tf.sigmoid(logits_fake)
            tf.summary.histogram('score-real', score_real)
            tf.summary.histogram('score-fake', score_fake)

            with tf.name_scope("discrim"):
                d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_real, labels=tf.ones_like(logits_real)), name='loss_real')
                d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_fake, labels=tf.zeros_like(logits_fake)), name='loss_fake')

                d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real')
                d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake')

                d_accuracy = tf.add(.5 * d_pos_acc, .5 * d_neg_acc, name='accuracy')
                self.d_loss = tf.add(.5 * d_loss_pos, .5 * d_loss_neg, name='loss')

            with tf.name_scope("gen"):
                self.g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                    logits=logits_fake, labels=tf.ones_like(logits_fake)), name='loss')
                g_accuracy = tf.reduce_mean(tf.cast(score_fake > 0.5, tf.float32), name='accuracy')

            add_moving_summary(self.g_loss, self.d_loss, d_accuracy, g_accuracy)
Exemple #9
0
    def build_graph(self, input, output):
        input, output = input / 128.0 - 1, output / 128.0 - 1

        with argscope([Conv2D, Conv2DTranspose], kernel_initializer=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_output = self.generator(input)
            with tf.variable_scope('discrim'):
                real_pred = self.discriminator(input, output)
                fake_pred = self.discriminator(input, fake_output)

        self.build_losses(real_pred, fake_pred)
        errL1 = tf.reduce_mean(tf.abs(fake_output - output), name='L1_loss')
        self.g_loss = tf.add(self.g_loss, LAMBDA * errL1, name='total_g_loss')
        add_moving_summary(errL1, self.g_loss)

        # tensorboard visualization
        if IN_CH == 1:
            input = tf.image.grayscale_to_rgb(input)
        if OUT_CH == 1:
            output = tf.image.grayscale_to_rgb(output)
            fake_output = tf.image.grayscale_to_rgb(fake_output)

        visualize_tensors('input,output,fake', [input, output, fake_output], max_outputs=max(30, BATCH))

        self.collect_variables()
Exemple #10
0
        def LSGAN_losses(real, fake):
            d_real = tf.reduce_mean(tf.squared_difference(real, 1), name='d_real')
            d_fake = tf.reduce_mean(tf.square(fake), name='d_fake')
            d_loss = tf.multiply(d_real + d_fake, 0.5, name='d_loss')

            g_loss = tf.reduce_mean(tf.squared_difference(fake, 1), name='g_loss')
            add_moving_summary(g_loss, d_loss)
            return g_loss, d_loss
Exemple #11
0
def rpn_losses(anchor_labels, anchor_boxes, label_logits, box_logits):
    """
    Args:
        anchor_labels: fHxfWxNA
        anchor_boxes: fHxfWxNAx4, encoded
        label_logits:  fHxfWxNA
        box_logits: fHxfWxNAx4

    Returns:
        label_loss, box_loss
    """
    with tf.device('/cpu:0'):
        valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
        pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
        nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask, dtype=tf.int32), name='num_valid_anchor')
        nr_pos = tf.count_nonzero(pos_mask, dtype=tf.int32, name='num_pos_anchor')

        valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
    valid_label_logits = tf.boolean_mask(label_logits, valid_mask)

    with tf.name_scope('label_metrics'):
        valid_label_prob = tf.nn.sigmoid(valid_label_logits)
        summaries = []
        with tf.device('/cpu:0'):
            for th in [0.5, 0.2, 0.1]:
                valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
                nr_pos_prediction = tf.reduce_sum(valid_prediction, name='num_pos_prediction')
                pos_prediction_corr = tf.count_nonzero(
                    tf.logical_and(
                        valid_label_prob > th,
                        tf.equal(valid_prediction, valid_anchor_labels)),
                    dtype=tf.int32)
                summaries.append(tf.truediv(
                    pos_prediction_corr,
                    nr_pos, name='recall_th{}'.format(th)))
                precision = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos_prediction))
                precision = tf.where(tf.equal(nr_pos_prediction, 0), 0.0, precision, name='precision_th{}'.format(th))
                summaries.append(precision)
        add_moving_summary(*summaries)

    label_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
    label_loss = tf.reduce_mean(label_loss, name='label_loss')

    pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
    pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
    delta = 1.0 / 9
    box_loss = tf.losses.huber_loss(
        pos_anchor_boxes, pos_box_logits, delta=delta,
        reduction=tf.losses.Reduction.SUM) / delta
    box_loss = tf.div(
        box_loss,
        tf.cast(nr_valid, tf.float32), name='box_loss')

    add_moving_summary(label_loss, box_loss, nr_valid, nr_pos)
    return label_loss, box_loss
Exemple #12
0
    def _build_graph(self, inputs):
        x, y, label = inputs
        x, y = self.embed([x, y])

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tf.identity(self.embed(inputs[0]), name="emb")

        cost = symbf.siamese_cosine_loss(x, y, label, scope="loss")
        self.cost = tf.identity(cost, name="cost")
        add_moving_summary(self.cost)
 def get_feature_match_loss(self, feats_real, feats_fake):
     losses = []
     for real, fake in zip(feats_real, feats_fake):
         loss = tf.reduce_mean(tf.squared_difference(
             tf.reduce_mean(real, 0),
             tf.reduce_mean(fake, 0)),
             name='mse_feat_' + real.op.name)
         losses.append(loss)
     ret = tf.add_n(losses, name='feature_match_loss')
     add_moving_summary(ret)
     return ret
Exemple #14
0
    def _build_graph(self, inputs):
        a, p, n = inputs
        a, p, n = self.embed([a, p, n])

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tf.identity(self.embed(inputs[0]), name="emb")

        cost, pos_dist, neg_dist = self.loss(a, p, n)

        self.cost = tf.identity(cost, name="cost")
        add_moving_summary(pos_dist, neg_dist, self.cost)
Exemple #15
0
    def build_graph(self, x, y, label):
        single_input = x
        x, y = self.embed([x, y])

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tf.identity(self.embed(single_input), name="emb")

        cost = siamese_cosine_loss(x, y, label, scope="loss")
        cost = tf.identity(cost, name="cost")
        add_moving_summary(cost)
        return cost
Exemple #16
0
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):
            logits = (LinearWrap(image)
                      .Conv2D('conv0')
                      .MaxPooling('pool0', 2)
                      .Conv2D('conv1')
                      .Conv2D('conv2')
                      .MaxPooling('pool1', 2)
                      .Conv2D('conv3')
                      .FullyConnected('fc0', 512, nl=tf.nn.relu)
                      .Dropout('dropout', 0.5)
                      .FullyConnected('fc1', out_dim=10, nl=tf.identity)())

        tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
Exemple #17
0
    def build_graph(self, a, p, n):
        single_input = a
        a, p, n = self.embed([a, p, n])

        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tf.identity(self.embed(single_input), name="emb")

        cost, pos_dist, neg_dist = self.loss(a, p, n)

        cost = tf.identity(cost, name="cost")
        add_moving_summary(pos_dist, neg_dist, cost)
        return cost
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu):
            l = tf.layers.conv2d(image, 32, 3, name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv1')
            l = tf.layers.conv2d(l, 32, 3, name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(l, rate=0.5,
                                  training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
    def _build_graph(self, inputs):
        image, label = inputs
        image = ImageNetModel.image_preprocess(image, bgr=self.image_bgr)
        if self.data_format == 'NCHW':
            image = tf.transpose(image, [0, 3, 1, 2])

        logits = self.get_logits(image)
        loss = ImageNetModel.compute_loss_and_error(logits, label)
        wd_loss = regularize_cost(self.weight_decay_pattern,
                                  tf.contrib.layers.l2_regularizer(self.weight_decay),
                                  name='l2_regularize_loss')
        add_moving_summary(loss, wd_loss)
        self.cost = tf.add_n([loss, wd_loss], name='cost')
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        l = tf.layers.conv2d(image, 32, 3, padding='same', activation=tf.nn.relu, name='conv0')
        l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv1')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv2')
        l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv3')
        l = tf.layers.flatten(l)
        l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
        l = tf.layers.dropout(l, rate=0.5,
                              training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
Exemple #21
0
    def build_graph(self, input, nextinput):
        is_training = get_current_tower_context().is_training
        initializer = tf.random_uniform_initializer(-0.05, 0.05)

        def get_basic_cell():
            cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE, forget_bias=0.0, reuse=tf.get_variable_scope().reuse)
            if is_training:
                cell = rnn.DropoutWrapper(cell, output_keep_prob=1 - DROPOUT)
            return cell

        cell = rnn.MultiRNNCell([get_basic_cell() for _ in range(NUM_LAYER)])

        def get_v(n):
            return tf.get_variable(n, [BATCH, HIDDEN_SIZE],
                                   trainable=False,
                                   initializer=tf.constant_initializer())

        state_var = [rnn.LSTMStateTuple(
            get_v('c{}'.format(k)), get_v('h{}'.format(k))) for k in range(NUM_LAYER)]
        self.state = state_var = tuple(state_var)

        embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE], initializer=initializer)
        input_feature = tf.nn.embedding_lookup(embeddingW, input)  # B x seqlen x hiddensize
        input_feature = Dropout(input_feature, keep_prob=1 - DROPOUT)

        with tf.variable_scope('LSTM', initializer=initializer):
            input_list = tf.unstack(input_feature, num=SEQ_LEN, axis=1)  # seqlen x (Bxhidden)
            outputs, last_state = rnn.static_rnn(cell, input_list, state_var, scope='rnn')

        # update the hidden state after a rnn loop completes
        update_state_ops = []
        for k in range(NUM_LAYER):
            update_state_ops.extend([
                tf.assign(state_var[k].c, last_state[k].c),
                tf.assign(state_var[k].h, last_state[k].h)])

        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(outputs, 1), [-1, HIDDEN_SIZE])  # (Bxseqlen) x hidden
        logits = FullyConnected('fc', output, VOCAB_SIZE,
                                activation=tf.identity, kernel_initializer=initializer,
                                bias_initializer=initializer)
        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))

        with tf.control_dependencies(update_state_ops):
            cost = tf.truediv(tf.reduce_sum(xent_loss),
                              tf.cast(BATCH, tf.float32), name='cost')  # log-perplexity

        perpl = tf.exp(cost / SEQ_LEN, name='perplexity')
        summary.add_moving_summary(perpl, cost)
        return cost
Exemple #22
0
    def _build_graph(self, inputs):
        image_pos = inputs[0]
        image_pos = image_pos / 128.0 - 1

        z = tf.random_uniform([args.batch, args.z_dim], minval=-1, maxval=1, name='z_train')
        z = tf.placeholder_with_default(z, [None, args.z_dim], name='z')

        def summary_image(name, x):
            x = (x + 1.0) * 128.0
            x = tf.clip_by_value(x, 0, 255)
            tf.summary.image(name, tf.cast(x, tf.uint8), max_outputs=30)

        with argscope([Conv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                image_gen = self.decoder(z)

            with tf.variable_scope('discrim'):
                with tf.variable_scope('enc'):
                    hidden_pos = self.encoder(image_pos)
                    hidden_neg = self.encoder(image_gen)

                with tf.variable_scope('dec'):
                    recon_pos = self.decoder(hidden_pos)
                    recon_neg = self.decoder(hidden_neg)

        with tf.name_scope('viz'):
            summary_image('generated-samples', image_gen)
            summary_image('reconstruct-real', recon_pos)
            summary_image('reconstruct-fake', recon_neg)

        with tf.name_scope('losses'):
            L_pos = tf.reduce_mean(tf.abs(recon_pos - image_pos), name='loss_pos')
            L_neg = tf.reduce_mean(tf.abs(recon_neg - image_gen), name='loss_neg')

            eq = tf.subtract(GAMMA * L_pos, L_neg, name='equilibrium')
            measure = tf.add(L_pos, tf.abs(eq), name='measure')

            kt = tf.get_variable('kt', dtype=tf.float32, initializer=0.0)

            update_kt = kt.assign_add(1e-3 * eq)
            with tf.control_dependencies([update_kt]):
                self.d_loss = tf.subtract(L_pos, kt * L_neg, name='loss_D')
                self.g_loss = L_neg

        add_moving_summary(L_pos, L_neg, eq, measure, self.d_loss)
        tf.summary.scalar('kt', kt)

        self.collect_variables()
    def compute_loss_and_error(logits, label):
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        loss = tf.reduce_mean(loss, name='xentropy-loss')

        def prediction_incorrect(logits, label, topk=1, name='incorrect_vector'):
            with tf.name_scope('prediction_incorrect'):
                x = tf.logical_not(tf.nn.in_top_k(logits, label, topk))
            return tf.cast(x, tf.float32, name=name)

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))

        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5'))
        return loss
Exemple #24
0
    def build_graph(self, x, y, label):
        # embed them
        single_input = x
        x, y = self.embed([x, y])

        # tag the embedding of 'input' with name 'emb', just for inference later on
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tf.identity(self.embed(single_input), name="emb")

        # compute the actual loss
        cost, pos_dist, neg_dist = contrastive_loss(x, y, label, 5., extra=True, scope="loss")
        cost = tf.identity(cost, name="cost")

        # track these values during training
        add_moving_summary(pos_dist, neg_dist, cost)
        return cost
Exemple #25
0
    def _build_graph(self, inputs):
        # get inputs
        x, y, label = inputs
        # embed them
        x, y = self.embed([x, y])

        # tag the embedding of 'input' with name 'emb', just for inference later on
        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
            tf.identity(self.embed(inputs[0]), name="emb")

        # compute the actual loss
        cost, pos_dist, neg_dist = symbf.contrastive_loss(x, y, label, 5., extra=True, scope="loss")
        self.cost = tf.identity(cost, name="cost")

        # track these values during training
        add_moving_summary(pos_dist, neg_dist, self.cost)
Exemple #26
0
    def sample_fg_bg(iou):
        fg_mask = tf.reduce_max(iou, axis=1) >= cfg.FRCNN.FG_THRESH

        fg_inds = tf.reshape(tf.where(fg_mask), [-1])
        num_fg = tf.minimum(int(
            cfg.FRCNN.BATCH_PER_IM * cfg.FRCNN.FG_RATIO),
            tf.size(fg_inds), name='num_fg')
        fg_inds = tf.random_shuffle(fg_inds)[:num_fg]

        bg_inds = tf.reshape(tf.where(tf.logical_not(fg_mask)), [-1])
        num_bg = tf.minimum(
            cfg.FRCNN.BATCH_PER_IM - num_fg,
            tf.size(bg_inds), name='num_bg')
        bg_inds = tf.random_shuffle(bg_inds)[:num_bg]

        add_moving_summary(num_fg, num_bg)
        return fg_inds, bg_inds
Exemple #27
0
    def build_graph(self, x, label):
        # embed them
        x = self.embed(x)
        x = tf.identity(x, name='emb')

        # compute the embedding loss
        emb_cost = center_loss(x, label, 10, 0.01)
        # compute the classification loss
        logits = slim.layers.fully_connected(tf.nn.relu(x), 10, activation_fn=None, scope='logits')

        cls_cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label),
                                  name='classification_costs')
        total_cost = tf.add(emb_cost, 100 * cls_cost, name="cost")

        # track these values during training
        add_moving_summary(total_cost, cls_cost, emb_cost)
        return total_cost
Exemple #28
0
def fastrcnn_losses(labels, label_logits, fg_boxes, fg_box_logits):
    """
    Args:
        labels: n,
        label_logits: nxC
        fg_boxes: nfgx4, encoded
        fg_box_logits: nfgxCx4 or nfgx1x4 if class agnostic

    Returns:
        label_loss, box_loss
    """
    label_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=labels, logits=label_logits)
    label_loss = tf.reduce_mean(label_loss, name='label_loss')

    fg_inds = tf.where(labels > 0)[:, 0]
    fg_labels = tf.gather(labels, fg_inds)
    num_fg = tf.size(fg_inds, out_type=tf.int64)
    empty_fg = tf.equal(num_fg, 0)
    if int(fg_box_logits.shape[1]) > 1:
        indices = tf.stack(
            [tf.range(num_fg), fg_labels], axis=1)  # #fgx2
        fg_box_logits = tf.gather_nd(fg_box_logits, indices)
    else:
        fg_box_logits = tf.reshape(fg_box_logits, [-1, 4])

    with tf.name_scope('label_metrics'), tf.device('/cpu:0'):
        prediction = tf.argmax(label_logits, axis=1, name='label_prediction')
        correct = tf.to_float(tf.equal(prediction, labels))  # boolean/integer gather is unavailable on GPU
        accuracy = tf.reduce_mean(correct, name='accuracy')
        fg_label_pred = tf.argmax(tf.gather(label_logits, fg_inds), axis=1)
        num_zero = tf.reduce_sum(tf.to_int64(tf.equal(fg_label_pred, 0)), name='num_zero')
        false_negative = tf.where(
            empty_fg, 0., tf.to_float(tf.truediv(num_zero, num_fg)), name='false_negative')
        fg_accuracy = tf.where(
            empty_fg, 0., tf.reduce_mean(tf.gather(correct, fg_inds)), name='fg_accuracy')

    box_loss = tf.losses.huber_loss(
        fg_boxes, fg_box_logits, reduction=tf.losses.Reduction.SUM)
    box_loss = tf.truediv(
        box_loss, tf.to_float(tf.shape(labels)[0]), name='box_loss')

    add_moving_summary(label_loss, box_loss, accuracy,
                       fg_accuracy, false_negative, tf.to_float(num_fg, name='num_fg_label'))
    return label_loss, box_loss
Exemple #29
0
def proposal_metrics(iou):
    """
    Add summaries for RPN proposals.

    Args:
        iou: nxm, #proposal x #gt
    """
    # find best roi for each gt, for summary only
    best_iou = tf.reduce_max(iou, axis=0)
    mean_best_iou = tf.reduce_mean(best_iou, name='best_iou_per_gt')
    summaries = [mean_best_iou]
    with tf.device('/cpu:0'):
        for th in [0.3, 0.5]:
            recall = tf.truediv(
                tf.count_nonzero(best_iou >= th),
                tf.size(best_iou, out_type=tf.int64),
                name='recall_iou{}'.format(th))
            summaries.append(recall)
    add_moving_summary(*summaries)
Exemple #30
0
    def build_graph(self, comb_state, action, reward, isOver):
        comb_state = tf.cast(comb_state, tf.float32)
        comb_state = tf.reshape(
            comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel])

        state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1])
        state = tf.reshape(state, self._shape4d_for_prediction, name='state')
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state')
        next_state = tf.reshape(next_state, self._shape4d_for_prediction)
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True):
            targetQ_predict_value = self.get_DQN_prediction(next_state)    # NxA

        if self.method != 'Double':
            # DQN
            best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,
        else:
            # Double-DQN
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)   # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        cost = tf.losses.huber_loss(
            target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
                                  ('fc.*/W', ['histogram', 'rms']))   # monitor all W
        summary.add_moving_summary(cost)
        return cost
Exemple #31
0
 def build_losses(self, vecpos, vecneg):
     # the Wasserstein-GAN losses
     self.d_loss = tf.reduce_mean(vecneg - vecpos, name='d_loss')
     self.g_loss = tf.negative(tf.reduce_mean(vecneg), name='g_loss')
     add_moving_summary(self.d_loss, self.g_loss)
Exemple #32
0
    def build_graph(self, real_sample):
        real_sample = tf.expand_dims(real_sample, -1)

        # sample the latent code:
        zc = shapeless_placeholder(sample_prior(BATCH), 0, name='z_code')
        z_noise = shapeless_placeholder(tf.random_uniform([BATCH, NOISE_DIM],
                                                          -1, 1),
                                        0,
                                        name='z_noise')
        z = tf.concat([zc, z_noise], 1, name='z')

        with argscope([Conv2D, Conv2DTranspose, FullyConnected],
                      kernel_initializer=tf.truncated_normal_initializer(
                          stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_sample = self.generator(z)
                fake_sample_viz = tf.cast((fake_sample) * 255.0,
                                          tf.uint8,
                                          name='viz')
                tf.summary.image('gen', fake_sample_viz, max_outputs=30)

            # may need to investigate how bn stats should be updated across two discrim
            with tf.variable_scope('discrim'):
                real_pred, _ = self.discriminator(real_sample)
                fake_pred, dist_param = self.discriminator(fake_sample)
        """
        Mutual information between x (i.e. zc in this case) and some
        information s (the generated samples in this case):

                    I(x;s) = H(x) - H(x|s)
                           = H(x) + E[\log P(x|s)]

        The distribution from which zc is sampled, in this case, is set to a fixed prior already.
        So the first term is a constant.
        For the second term, we can maximize its variational lower bound:
                    E_{x \sim P(x|s)}[\log Q(x|s)]
        where Q(x|s) is a proposal distribution to approximate P(x|s).

        Here, Q(x|s) is assumed to be a distribution which shares the form
        of P, and whose parameters are predicted by the discriminator network.
        """
        with tf.name_scope("mutual_information"):
            with tf.name_scope('prior_entropy'):
                cat, uni = get_distributions(DIST_PRIOR_PARAM[:NUM_CLASS],
                                             DIST_PRIOR_PARAM[NUM_CLASS:])
                ents = [
                    cat.entropy(name='cat_entropy'),
                    tf.reduce_sum(uni.entropy(), name='uni_entropy')
                ]
                entropy = tf.add_n(ents, name='total_entropy')
                # Note that the entropy of prior is a constant. The paper mentioned it but didn't use it.

            with tf.name_scope('conditional_entropy'):
                cond_ents = entropy_from_samples(zc, dist_param)
                cond_entropy = tf.add_n(cond_ents, name="total_entropy")

            MI = tf.subtract(entropy, cond_entropy, name='mutual_information')
            summary.add_moving_summary(entropy, cond_entropy, MI, *cond_ents)

        # default GAN objective
        self.build_losses(real_pred, fake_pred)

        # subtract mutual information for latent factors (we want to maximize them)
        self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss')
        self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss')

        summary.add_moving_summary(self.g_loss, self.d_loss)

        # distinguish between variables of generator and discriminator updates
        self.collect_variables()
Exemple #33
0
    def build_graph(self, A, B):
        with tf.name_scope('preprocess'):
            A = tf.transpose(A / 128.0 - 1.0, [0, 3, 1, 2])
            B = tf.transpose(B / 128.0 - 1.0, [0, 3, 1, 2])

        def viz3(name, a, b, c):
            with tf.name_scope(name):
                im = tf.concat([a, b, c], axis=3)
                im = tf.transpose(im, [0, 2, 3, 1])
                im = (im + 1.0) * 128
                im = tf.clip_by_value(im, 0, 255)
                im = tf.cast(im, tf.uint8, name='viz')
            tf.summary.image(name, im, max_outputs=50)

        # use the initializers from torch
        with argscope([Conv2D, Conv2DTranspose], use_bias=False,
                      kernel_initializer=tf.random_normal_initializer(stddev=0.02)), \
                argscope([Conv2D, Conv2DTranspose, InstanceNorm], data_format='channels_first'):
            with tf.variable_scope('gen'):
                with tf.variable_scope('B'):
                    AB = self.generator(A)
                with tf.variable_scope('A'):
                    BA = self.generator(B)
                    ABA = self.generator(AB)
                with tf.variable_scope('B'):
                    BAB = self.generator(BA)

            viz3('A_recon', A, AB, ABA)
            viz3('B_recon', B, BA, BAB)

            with tf.variable_scope('discrim'):
                with tf.variable_scope('A'):
                    A_dis_real = self.discriminator(A)
                    A_dis_fake = self.discriminator(BA)

                with tf.variable_scope('B'):
                    B_dis_real = self.discriminator(B)
                    B_dis_fake = self.discriminator(AB)

        def LSGAN_losses(real, fake):
            d_real = tf.reduce_mean(tf.squared_difference(real, 1),
                                    name='d_real')
            d_fake = tf.reduce_mean(tf.square(fake), name='d_fake')
            d_loss = tf.multiply(d_real + d_fake, 0.5, name='d_loss')

            g_loss = tf.reduce_mean(tf.squared_difference(fake, 1),
                                    name='g_loss')
            add_moving_summary(g_loss, d_loss)
            return g_loss, d_loss

        with tf.name_scope('losses'):
            with tf.name_scope('LossA'):
                # reconstruction loss
                recon_loss_A = tf.reduce_mean(tf.abs(A - ABA),
                                              name='recon_loss')
                # gan loss
                G_loss_A, D_loss_A = LSGAN_losses(A_dis_real, A_dis_fake)

            with tf.name_scope('LossB'):
                recon_loss_B = tf.reduce_mean(tf.abs(B - BAB),
                                              name='recon_loss')
                G_loss_B, D_loss_B = LSGAN_losses(B_dis_real, B_dis_fake)

            LAMBDA = 10.0
            self.g_loss = tf.add((G_loss_A + G_loss_B),
                                 (recon_loss_A + recon_loss_B) * LAMBDA,
                                 name='G_loss_total')
            self.d_loss = tf.add(D_loss_A, D_loss_B, name='D_loss_total')
        self.collect_variables('gen', 'discrim')

        add_moving_summary(recon_loss_A, recon_loss_B, self.g_loss,
                           self.d_loss)
Exemple #34
0
    def build_graph(self, role_id, prob_state, value_state, last_cards,
                    action_target, mode, history_action_prob,
                    discounted_return, lstm_state):

        active_logits, passive_logits, new_lstm_state = self.get_policy(
            role_id, prob_state, last_cards, lstm_state)
        new_lstm_state = tf.identity(new_lstm_state, name='new_lstm_state')
        active_prob = tf.nn.softmax(active_logits, name='active_prob')
        passive_prob = tf.nn.softmax(passive_logits, name='passive_prob')
        mode_out = tf.identity(mode, name='mode_out')
        value = self.get_value(role_id, value_state)
        # this is the value for each agent, not the global value
        value = tf.identity(value, name='pred_value')
        is_training = get_current_tower_context().is_training

        if not is_training:
            return

        action_target_onehot = tf.one_hot(action_target, len(action_space))

        # active mode
        active_logpa = tf.reduce_sum(
            action_target_onehot *
            tf.log(tf.clip_by_value(active_prob, 1e-7, 1 - 1e-7)), 1)

        # passive mode
        passive_logpa = tf.reduce_sum(
            action_target_onehot *
            tf.log(tf.clip_by_value(passive_prob, 1e-7, 1 - 1e-7)), 1)

        # B * 2
        logpa = tf.stack([active_logpa, passive_logpa], axis=1)
        idx = tf.stack([tf.range(tf.shape(prob_state)[0]), mode], axis=1)

        # B
        logpa = tf.gather_nd(logpa, idx)

        # importance sampling
        active_pa = tf.reduce_sum(
            action_target_onehot *
            tf.clip_by_value(active_prob, 1e-7, 1 - 1e-7), 1)
        passive_pa = tf.reduce_sum(
            action_target_onehot *
            tf.clip_by_value(passive_prob, 1e-7, 1 - 1e-7), 1)

        # B * 2
        pa = tf.stack([active_pa, passive_pa], axis=1)
        idx = tf.stack([tf.range(tf.shape(prob_state)[0]), mode], axis=1)

        # B
        pa = tf.gather_nd(pa, idx)

        # using PPO
        ppo_epsilon = tf.get_variable('ppo_epsilon',
                                      shape=[],
                                      initializer=tf.constant_initializer(0.2),
                                      trainable=False)
        importance_b = pa / (history_action_prob + 1e-8)

        # advantage
        advantage_b = tf.subtract(discounted_return,
                                  tf.stop_gradient(value),
                                  name='advantage')

        policy_loss_b = -tf.minimum(
            importance_b * advantage_b,
            tf.clip_by_value(importance_b, 1 - ppo_epsilon, 1 + ppo_epsilon) *
            advantage_b)
        entropy_loss_b = pa * logpa
        value_loss_b = tf.square(value - discounted_return)

        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.005),
            trainable=False)

        value_weight = tf.get_variable(
            'value_weight',
            shape=[],
            initializer=tf.constant_initializer(0.2),
            trainable=False)

        # regularization loss
        ctx = get_current_tower_context()
        if ctx.has_own_variables:  # be careful of the first tower (name='')
            l2_loss = ctx.get_collection_in_tower(
                tf.GraphKeys.REGULARIZATION_LOSSES)
        else:
            l2_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        if len(l2_loss) > 0:
            logger.info(
                "regularize_cost_from_collection() found {} regularizers "
                "in REGULARIZATION_LOSSES collection.".format(len(l2_loss)))

        # 3 * 2
        l2_losses = []
        for role in range(1, 4):
            scope = 'policy_network_%d' % role
            l2_loss_role = [l for l in l2_loss if l.op.name.startswith(scope)]
            l2_active_loss = [
                l for l in l2_loss_role if 'branch_passive' not in l.name
            ]
            l2_passive_loss = l2_loss_role
            print('l2 active loss: {}'.format(len(l2_active_loss)))
            print('l2 passive loss: {}'.format(len(l2_passive_loss)))

            # 2
            losses = [tf.add_n(l2_active_loss), tf.add_n(l2_passive_loss)]
            losses = tf.stack(losses, axis=0)
            if role == 1 or role == 3:
                losses = tf.stop_gradient(losses)
            l2_losses.append(losses)

        # 3 * 2
        l2_losses = tf.stack(l2_losses, axis=0)

        # B * 2
        l2_losses = tf.gather(l2_losses, role_id)

        # B
        l2_losses = tf.gather_nd(l2_losses, idx)

        print(l2_losses.shape)
        # print(policy_loss_b.shape)
        # print(entropy_loss_b.shape)
        # print(value_loss_b.shape)
        # print(advantage_b.shape)
        costs = []
        for i in range(1, 4):
            mask = tf.equal(role_id, i)
            valid_batch = tf.reduce_sum(tf.cast(mask, tf.float32))
            # print(mask.shape)
            l2_loss = tf.truediv(tf.reduce_sum(tf.boolean_mask(
                l2_losses, mask)),
                                 valid_batch,
                                 name='l2_loss_%d' % i)
            pred_reward = tf.truediv(tf.reduce_sum(tf.boolean_mask(
                value, mask)),
                                     valid_batch,
                                     name='predict_reward_%d' % i)
            true_reward = tf.truediv(tf.reduce_sum(
                tf.boolean_mask(discounted_return, mask)),
                                     valid_batch,
                                     name='true_reward_%d' % i)
            advantage = tf.sqrt(tf.truediv(
                tf.reduce_sum(tf.square(tf.boolean_mask(advantage_b, mask))),
                valid_batch),
                                name='rms_advantage_%d' % i)

            policy_loss = tf.truediv(tf.reduce_sum(
                tf.boolean_mask(policy_loss_b, mask)),
                                     valid_batch,
                                     name='policy_loss_%d' % i)
            entropy_loss = tf.truediv(tf.reduce_sum(
                tf.boolean_mask(entropy_loss_b, mask)),
                                      valid_batch,
                                      name='entropy_loss_%d' % i)
            value_loss = tf.truediv(tf.reduce_sum(
                tf.boolean_mask(value_loss_b, mask)),
                                    valid_batch,
                                    name='value_loss_%d' % i)
            cost = tf.add_n([
                policy_loss, entropy_loss * entropy_beta,
                value_weight * value_loss, l2_loss
            ],
                            name='cost_%d' % i)
            # cost = tf.truediv(cost, tf.reduce_sum(tf.cast(mask, tf.float32)), name='cost_%d' % i)
            costs.append(cost)

            importance = tf.truediv(tf.reduce_sum(
                tf.boolean_mask(importance_b, mask)),
                                    valid_batch,
                                    name='importance_%d' % i)
            add_moving_summary(policy_loss,
                               entropy_loss,
                               value_loss,
                               l2_loss,
                               pred_reward,
                               true_reward,
                               advantage,
                               cost,
                               importance,
                               decay=0)

        return tf.add_n(costs)
Exemple #35
0
    def build_graph(self, *inputs):
        mseq, mlen, pseq, plen, pve, target = inputs[:6]
        h_stats = list(inputs[6:])

        initializer = tf.random_uniform_initializer(-0.1, 0.1)
        with tf.variable_scope(self.vs_name):
            # Feature embedding
            vocab_size = LayerTypes.num_layer_types()
            embeddingW = tf.get_variable('embedding',
                                         [vocab_size, self.lstm_size],
                                         initializer=initializer)
            mfeat = tf.nn.embedding_lookup(embeddingW,
                                           mseq)  # B x seqlen x hiddensize
            mfeat = Dropout(mfeat, keep_prob=self.dropout_kp)
            pfeat = tf.nn.embedding_lookup(embeddingW, pseq)
            pfeat = Dropout(pfeat, keep_prob=self.dropout_kp)

            # LSTM structures
            def get_basic_cell():
                cell = rnn.LSTMCell(num_units=self.lstm_size,
                                    initializer=initializer,
                                    reuse=tf.get_variable_scope().reuse)
                cell = rnn.DropoutWrapper(cell,
                                          output_keep_prob=self.dropout_kp)
                return cell

            cells = rnn.MultiRNNCell(
                [get_basic_cell() for _ in range(self.num_lstms)])
            #cells =cudnn_rnn.CudnnLSTM(self.num_lstms, self.lstm_size, dropout=1 - self.dropout_kp,
            #    kernel_initializer=initializer)
            # initial state
            mstate = cells.zero_state(self.batch_size, dtype=tf.float32)
            pstate = cells.zero_state(self.batch_size, dtype=tf.float32)

            # apply LSTMs on the feature embedding
            with tf.variable_scope('LSTM'):
                mout, mstate = tf.nn.dynamic_rnn(cells,
                                                 mfeat,
                                                 initial_state=mstate,
                                                 sequence_length=mlen)
                pout, pstate = tf.nn.dynamic_rnn(cells,
                                                 pfeat,
                                                 initial_state=pstate,
                                                 sequence_length=plen)

            # only use the last output for predicting the child model accuracy
            mlen = tf.cast(tf.reshape(mlen, [self.batch_size, 1]),
                           dtype=tf.float32)
            plen = tf.cast(tf.reshape(plen, [self.batch_size, 1]),
                           dtype=tf.float32)
            pve = tf.reshape(pve, [self.batch_size, 1])
            h_stats = [tf.reshape(hs, [-1, 1]) for hs in h_stats]
            feat = tf.concat(values=[mout[:, -1], pout[:, -1], pve] + h_stats,
                             axis=1)
            pred = FullyConnected('fully_connect',
                                  feat,
                                  1,
                                  activation=tf.sigmoid)
            pred = tf.reshape(pred, [self.batch_size])
            self.pred = tf.identity(pred, name='predicted_accuracy')

            cost = tf.losses.mean_squared_error(target, self.pred)
            self.cost = tf.identity(cost, name='cost')
            add_moving_summary(self.cost)
            return self.cost
Exemple #36
0
    def build_graph(self, image, label):
        """
        Build the whole symbolic graph.
        This is supposed to be part of the "tower function" when used with :class:`TowerTrainer`.
        By default it will call :meth:`_build_graph` with a list of input tensors.
        A subclass is expected to overwrite this method or the :meth:`_build_graph` method.
        Args:
            args ([tf.Tensor]): tensors that matches the list of inputs defined by ``inputs()``.
        Returns:
            In general it returns nothing, but a subclass (e.g.
            :class:`ModelDesc`) may require it to return necessary information
            (e.g. cost) to build the trainer.
        """
        # inputs to conv nets are NWHC := Num_samples x Height x Width x Channels
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1  # center the pixels values at zero?? i don't understand ..

        # build symbolic layers somewhere in here
        # ref. info about argscope: http://tensorpack.readthedocs.io/en/latest/_modules/tensorpack/tfutils/argscope.html
        # making layers in argscope is supposed to let you do something ..? assign arg. characteristics to each layer
        # tp layers
        """
        #with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32):

        # following 6 layer architecture used previously
        c0 = Conv2D('conv0', image, kernel_size=3, nl=tf.nn.relu, out_channel=32)
        # c0.variables = None
        p0 = MaxPooling('pool0', c0, 2)
        # p0.variables = None
        c1 = Conv2D('conv1', p0, kernel_size=3, nl=tf.nn.relu, out_channel=32)
        # c1.variables = None
        p1 = MaxPooling('pool1', c1, 2)
        # p1.variables = None
        fc1 = FullyConnected('fc0', p1, 1024, nl=tf.nn.relu)
        # fc1.variables = None
        fc1 = Dropout('dropout', fc1, rate=0.6)
        # fc1.variables = None
        logits = FullyConnected('fc1', fc1, out_dim=10, nl=tf.identity)
        # logits.variables = None
        """
        # tf layers
        conv1 = tf.layers.conv2d(
            inputs=image,
            filters=32,
            kernel_size=3,
            kernel_initializer=tf.contrib.layers.variance_scaling_initializer(
                2.0),
            padding="same",
            activation=tf.nn.relu)

        # Pooling Layer #1
        pool1 = tf.layers.max_pooling2d(inputs=conv1,
                                        pool_size=[2, 2],
                                        strides=2)

        # Convolutional Layer #2 and Pooling Layer #2
        conv2 = tf.layers.conv2d(
            inputs=pool1,
            filters=32,
            kernel_size=3,
            kernel_initializer=tf.contrib.layers.variance_scaling_initializer(
                2.0),
            padding="same",
            activation=tf.nn.relu)

        pool2 = tf.layers.max_pooling2d(inputs=conv2,
                                        pool_size=[2, 2],
                                        strides=2)

        # Dense Layer
        pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 32])
        dense = tf.layers.dense(inputs=pool2_flat,
                                units=1024,
                                activation=tf.nn.relu)
        dropout = tf.layers.dropout(inputs=dense, rate=0.4, training=True)

        # Logits Layer
        logits = tf.layers.dense(inputs=dropout, units=10)
        #"""

        # Should I have this line if I'm doing sparse_softmax_cross_entropy_with_logits later?
        tf.nn.softmax(logits, name='prob')  # normalize to usable prob. distr.

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        # Casts to float32 type after checking if the prediction (1st) is equal to the label value
        correct = tf.cast(tf.nn.in_top_k(logits, label, 1),
                          tf.float32,
                          name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')  # ?
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # Regularizing - avoiding overfitting
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))  # ?
        return total_cost
Exemple #37
0
    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        fm_anchors = self._get_anchors(image)
        image = self._preprocess(image)

        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)
        featuremap = pretrained_resnet_conv4(image,
                                             config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head(featuremap, 1024,
                                                    config.NUM_ANCHOR)
        rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                  anchor_boxes_encoded,
                                                  rpn_label_logits,
                                                  rpn_box_logits)

        decoded_boxes = decode_bbox_target(
            rpn_box_logits, fm_anchors,
            config.ANCHOR_STRIDE)  # (fHxfWxNA)x4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            decoded_boxes, tf.reshape(rpn_label_logits, [-1]),
            tf.shape(image)[2:])

        if is_training:
            rcnn_sampled_boxes, rcnn_encoded_boxes, rcnn_labels = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
            roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)
            feature_fastrcnn = resnet_conv5_gap(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxc
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
                feature_fastrcnn, config.NUM_CLASS)

            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            self.cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, wd_cost
            ], 'total_cost')

            for k in self.cost, wd_cost:
                add_moving_summary(k)
        else:
            roi_resized = roi_align(
                featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14)
            feature_fastrcnn = resnet_conv5_gap(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxc
            label_logits, fastrcnn_box_logits = fastrcnn_head(
                feature_fastrcnn, config.NUM_CLASS)
            label_probs = tf.nn.softmax(label_logits,
                                        name='fastrcnn_all_probs')  # NP,
            labels = tf.argmax(label_logits, axis=1)
            fg_ind, fg_box_logits = fastrcnn_predict_boxes(
                labels, fastrcnn_box_logits)
            fg_label_probs = tf.gather(label_probs,
                                       fg_ind,
                                       name='fastrcnn_fg_probs')
            fg_boxes = tf.gather(proposal_boxes, fg_ind)

            fg_box_logits = fg_box_logits / tf.constant(
                config.FASTRCNN_BBOX_REG_WEIGHTS)
            decoded_boxes = decode_bbox_target(
                fg_box_logits, fg_boxes,
                config.ANCHOR_STRIDE)  # Nfx4, floatbox
            decoded_boxes = tf.identity(decoded_boxes,
                                        name='fastrcnn_fg_boxes')
Exemple #38
0
def sample_fast_rcnn_targets(boxes, gt_boxes, gt_labels):
    """
    Args:
        boxes: nx4 region proposals, floatbox
        gt_boxes: mx4, floatbox
        gt_labels: m, int32

    Returns:
        sampled_boxes: tx4 floatbox, the rois
        target_boxes: tx4 encoded box, the regression target
        labels: t labels
    """
    @under_name_scope()
    def assign_class_to_roi(iou, gt_boxes, gt_labels):
        """
        Args:
            iou: nxm (nr_proposal x nr_gt)
        Returns:
            fg_mask: n boolean, whether each roibox is fg
            roi_labels: n int32, best label for each roi box
            best_gt_boxes: nx4
        """
        # find best gt box for each roi box
        best_iou_ind = tf.argmax(iou, axis=1)  # n, each in 1~m
        best_iou = tf.reduce_max(iou, axis=1)  # n,
        best_gt_boxes = tf.gather(gt_boxes, best_iou_ind)  # nx4
        best_gt_labels = tf.gather(gt_labels, best_iou_ind)  # n, each in 1~C

        fg_mask = best_iou >= config.FASTRCNN_FG_THRESH
        return fg_mask, best_gt_labels, best_gt_boxes

    iou = pairwise_iou(boxes, gt_boxes)  # nxm

    with tf.name_scope('proposal_metrics'):
        # find best roi for each gt, for summary only
        best_iou = tf.reduce_max(iou, axis=0)
        mean_best_iou = tf.reduce_mean(best_iou, name='best_iou_per_gt')
        summaries = [mean_best_iou]
        with tf.device('/cpu:0'):
            for th in [0.3, 0.5]:
                recall = tf.truediv(tf.count_nonzero(best_iou >= th),
                                    tf.size(best_iou, out_type=tf.int64),
                                    name='recall_iou{}'.format(th))
                summaries.append(recall)
        add_moving_summary(*summaries)

    # n, n, nx4
    fg_mask, roi_labels, best_gt_boxes = assign_class_to_roi(
        iou, gt_boxes, gt_labels)

    # don't have to add gt for training, but add it anyway
    fg_inds = tf.reshape(tf.where(fg_mask), [-1])
    fg_inds = tf.concat([
        fg_inds,
        tf.cast(tf.range(tf.size(gt_labels)) + tf.shape(boxes)[0], tf.int64)
    ], 0)
    num_fg = tf.size(fg_inds)
    num_fg = tf.minimum(int(config.FASTRCNN_BATCH_PER_IM *
                            config.FASTRCNN_FG_RATIO),
                        num_fg,
                        name='num_fg')
    fg_inds = tf.slice(tf.random_shuffle(fg_inds), [0], [num_fg])

    bg_inds = tf.where(tf.logical_not(fg_mask))[:, 0]
    num_bg = tf.size(bg_inds)
    num_bg = tf.minimum(config.FASTRCNN_BATCH_PER_IM - num_fg,
                        num_bg,
                        name='num_bg')
    bg_inds = tf.slice(tf.random_shuffle(bg_inds), [0], [num_bg])

    add_moving_summary(num_fg, num_bg)

    all_boxes = tf.concat([boxes, gt_boxes], axis=0)
    all_matched_gt_boxes = tf.concat([best_gt_boxes, gt_boxes], axis=0)
    all_labels = tf.concat([roi_labels, gt_labels], axis=0)

    ind_in_all = tf.concat([fg_inds, bg_inds], axis=0)  # ind in all n+m boxes
    ret_boxes = tf.gather(all_boxes, ind_in_all, name='sampled_boxes')
    ret_matched_gt_boxes = tf.gather(all_matched_gt_boxes, ind_in_all)
    ret_encoded_boxes = encode_bbox_target(ret_matched_gt_boxes, ret_boxes)
    ret_encoded_boxes = ret_encoded_boxes * tf.constant(
        config.FASTRCNN_BBOX_REG_WEIGHTS)
    # bg boxes will not be trained on

    ret_labels = tf.concat([
        tf.gather(all_labels, fg_inds),
        tf.zeros_like(bg_inds, dtype=tf.int64)
    ],
                           axis=0,
                           name='sampled_labels')
    return ret_boxes, tf.stop_gradient(ret_encoded_boxes), tf.stop_gradient(
        ret_labels)
Exemple #39
0
    def build_graph(self, *inputs):
        is_training = get_current_tower_context().is_training
        if config.MODE_MASK:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs
        else:
            image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = self.preprocess(image)  # 1CHW

        featuremap = resnet_c4_backbone(image, config.RESNET_NUM_BLOCK[:3])
        rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, 1024,
                                                    config.NUM_ANCHOR)

        fm_anchors, anchor_labels, anchor_boxes = self.narrow_to_featuremap(
            featuremap, get_all_anchors(), anchor_labels, anchor_boxes)
        anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image_shape2d = tf.shape(image)[2:]  # h,w
        pred_boxes_decoded = decode_bbox_target(
            rpn_box_logits, fm_anchors)  # fHxfWxNAx4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            tf.reshape(pred_boxes_decoded, [-1, 4]),
            tf.reshape(rpn_label_logits,
                       [-1]), image_shape2d, config.TRAIN_PRE_NMS_TOPK
            if is_training else config.TEST_PRE_NMS_TOPK,
            config.TRAIN_POST_NMS_TOPK
            if is_training else config.TEST_POST_NMS_TOPK)

        if is_training:
            # sample proposal boxes in training
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            # Use all proposal boxes in inference
            rcnn_boxes = proposal_boxes

        boxes_on_featuremap = rcnn_boxes * (1.0 / config.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
        # which was fixed in TF 1.6
        def ff_true():
            feature_fastrcnn = resnet_conv5(
                roi_resized, config.RESNET_NUM_BLOCK[-1])  # nxcx7x7
            feature_gap = GlobalAvgPooling('gap',
                                           feature_fastrcnn,
                                           data_format='channels_first')
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
                'fastrcnn', feature_gap, config.NUM_CLASS)
            # Return C5 feature to be shared with mask branch
            return feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits

        def ff_false():
            ncls = config.NUM_CLASS
            return tf.zeros([0, 2048, 7,
                             7]), tf.zeros([0,
                                            ncls]), tf.zeros([0, ncls - 1, 4])

        if get_tf_version_number() >= 1.6:
            feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = ff_true(
            )
        else:
            logger.warn("This example may drop support for TF < 1.6 soon.")
            feature_fastrcnn, fastrcnn_label_logits, fastrcnn_box_logits = tf.cond(
                tf.size(boxes_on_featuremap) > 0, ff_true, ff_false)

        if is_training:
            # rpn loss
            rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)

            # fastrcnn loss
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits,
                                               fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes,
                fastrcnn_label_logits, fg_fastrcnn_box_logits)

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample)
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, config.NUM_CLASS,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    fg_sampled_boxes,
                    fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            total_cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, mrcnn_loss, wd_cost
            ], 'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)

            if config.MODE_MASK:
                # HACK to work around https://github.com/tensorflow/tensorflow/issues/14657
                def f1():
                    roi_resized = roi_align(
                        featuremap, final_boxes * (1.0 / config.ANCHOR_STRIDE),
                        14)
                    feature_maskrcnn = resnet_conv5(
                        roi_resized, config.RESNET_NUM_BLOCK[-1])
                    mask_logits = maskrcnn_upXconv_head(
                        'maskrcnn', feature_maskrcnn, config.NUM_CLASS,
                        0)  # #result x #cat x 14x14
                    indices = tf.stack([
                        tf.range(tf.size(final_labels)),
                        tf.to_int32(final_labels) - 1
                    ],
                                       axis=1)
                    final_mask_logits = tf.gather_nd(mask_logits,
                                                     indices)  # #resultx14x14
                    return tf.sigmoid(final_mask_logits)

                final_masks = tf.cond(
                    tf.size(final_labels) > 0, f1,
                    lambda: tf.zeros([0, 14, 14]))
                tf.identity(final_masks, name='final_masks')
	def _build_graph(self, inputs):
		G = tf.get_default_graph() # For round
		tf.local_variables_initializer()
		tf.global_variables_initializer()
		pi, pm, pl, ui, um, ul = inputs
		pi = cvt2tanh(pi)
		pm = cvt2tanh(pm)
		pl = cvt2tanh(pl)
		ui = cvt2tanh(ui)
		um = cvt2tanh(um)
		ul = cvt2tanh(ul)


		# def tf_membr(label):
		# 	with freeze_variables():
		# 		label = np_2imag(label, maxVal=MAX_LABEL)
		# 		label = np.squeeze(label) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one
		# 		# label, nb_labels = skimage.measure.label(color, return_num=True)
		# 		# label = np.expand_dims(label, axis=-1).astype(np.float32) # Modify here for batch
		# 		# for z in range(membr.shape[0]):
		# 		# 	membr[z,...] = 1-skimage.segmentation.find_boundaries(np.squeeze(label[z,...]), mode='thick') #, mode='inner'
		# 		membr = 1-skimage.segmentation.find_boundaries(np.squeeze(label), mode='thick') #, mode='inner'
		# 		membr = np.expand_dims(membr, axis=-1).astype(np.float32)
		# 		membr = np.expand_dims(membr, axis=0).astype(np.float32)
		# 		membr = np_2tanh(membr, maxVal=1.0)
		# 		membr = np.reshape(membr, label.shape)
		# 		return membr
		
		# def tf_label(color):
		# 	with freeze_variables():
		# 		color = np_2imag(color, maxVal=MAX_LABEL)
		# 		color = np.squeeze(color) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one
		# 		label, nb_labels = skimage.measure.label(color, return_num=True)
		# 		label = np.expand_dims(label, axis=-1).astype(np.float32)
		# 		label = np.expand_dims(label, axis=0).astype(np.float32)
		# 		label = np_2tanh(label, maxVal=MAX_LABEL)
		# 		label = np.reshape(label, color.shape)
		# 		return label

		def tf_rand_score (x1, x2):
			return 1.0 - adjusted_rand_score (x1.flatten (), x2.flatten ())

		def rounded(label, factor = MAX_LABEL, name='quantized'):
			with G.gradient_override_map({"Round": "Identity"}):
				with freeze_variables():
					with tf.name_scope(name=name):
						label = cvt2imag(label, maxVal=factor)
						label = tf.round(label)
						label = cvt2tanh(label, maxVal=factor)
					return tf.identity(label, name=name)


		with argscope([Conv2D, Deconv2D, FullyConnected],
					  W_init=tf.truncated_normal_initializer(stddev=0.02),
					  use_bias=False), \
				argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
				argscope([Conv2D, Deconv2D, BatchNorm], data_format='NHWC'), \
				argscope(LeakyReLU, alpha=0.2):

			

			with tf.variable_scope('gen'):
				# Real pair image 4 gen
				with tf.variable_scope('I2M'):
					pim = self.generator(pi)
				with tf.variable_scope('M2L'):
					piml  = self.generator(pim)
					pml   = self.generator(pm)
					# piml  = tf.py_func(tf_label, [(pim)], tf.float32)
					# pml   = tf.py_func(tf_label, [(pm)], tf.float32)
					# print pim
					# print piml
				# with tf.variable_scope('L2M'):
				# # with freeze_variables():
				# 	pimlm = self.generator(piml) #
				# 	plm   = self.generator(pl)
				# 	pmlm  = self.generator(pml)		
				# 	# pimlm = tf.py_func(tf_membr, [(piml)], tf.float32) #
				# 	# plm   = tf.py_func(tf_membr, [(pl)	], tf.float32)
				# 	# pmlm  = tf.py_func(tf_membr, [(pml)	], tf.float32)
				# 	# print piml
				# 	# print pimlm
				# with tf.variable_scope('M2I'):
				# 	pimlmi = self.generator(pimlm) #
				# 	pimi   = self.generator(pim)

				# # Real pair label 4 gen
				# with tf.variable_scope('L2M'):
				# # with freeze_variables():
				# 	plm = self.generator(pl)
				# 	# plm  = tf.py_func(tf_membr, [(pl)	, tf.float32])
				# with tf.variable_scope('M2I'):
				# 	plmi = self.generator(plm)
				# 	pmi  = self.generator(pi)
				# with tf.variable_scope('I2M'):
				# 	plmim = self.generator(plmi) #
				# 	pim   = self.generator(pi)
				# 	pmim  = self.generator(pmi)

				# with tf.variable_scope('M2L'):
				# 	plmiml = self.generator(plmim) #
				# 	plml   = self.generator(plm)
				# 	# plmiml = tf.py_func(tf_label, [(plmim)], tf.float32)
				# 	# plml   = tf.py_func(tf_label, [(plm)], tf.float32)

			with tf.variable_scope('discrim'):
				# with tf.variable_scope('I'):
				# 	i_dis_real 			  = self.discriminator(ui)
				# 	i_dis_fake_from_label = self.discriminator(plmi)
				with tf.variable_scope('M'):
					m_dis_real 			  = self.discriminator(um)
					m_dis_fake_from_image = self.discriminator(pim)
					# m_dis_fake_from_label = self.discriminator(plm)
				with tf.variable_scope('L'):
					l_dis_real 			  = self.discriminator(ul)
					l_dis_fake_from_image = self.discriminator(piml)
		


		piml  = rounded(piml) #
		pml   = rounded(pml)
		# plmiml = rounded(plmiml) #
		# plml   = rounded(plml)


		# with tf.name_scope('Recon_I_loss'):
		# 	recon_imi 		= tf.reduce_mean(tf.abs((pi) - (pimi)), name='recon_imi')
		# 	recon_lmi 		= tf.reduce_mean(tf.abs((pi) - (plmi)), name='recon_lmi')
		# 	recon_imlmi 	= tf.reduce_mean(tf.abs((pi) - (pimlmi)), name='recon_imlmi') #

		with tf.name_scope('Recon_L_loss'):
			# recon_lml 		= tf.reduce_mean(tf.abs((pl) - (plml)), name='recon_lml')
			recon_iml 		= tf.reduce_mean(tf.abs((pl) - (piml)), name='recon_iml')
			# recon_lmiml 	= tf.reduce_mean(tf.abs((pl) - (plmiml)), name='recon_lmiml') #

		with tf.name_scope('Recon_M_loss'):
			# recon_mim 		= tf.reduce_mean(tf.abs((pm) - (pmim)), name='recon_mim')
			# recon_mlm 		= tf.reduce_mean(tf.abs((pm) - (pmlm)), name='recon_mlm')

			recon_im 		= tf.reduce_mean(tf.abs((pm) - (pim)), name='recon_im')
			# recon_lm 		= tf.reduce_mean(tf.abs((pm) - (plm)), name='recon_lm')
			
		with tf.name_scope('GAN_loss'):
			# G_loss_IL, D_loss_IL = self.build_losses(i_dis_real, i_dis_fake_from_label, name='IL')
			G_loss_LI, D_loss_LI = self.build_losses(l_dis_real, l_dis_fake_from_image, name='LL')
			G_loss_MI, D_loss_MI = self.build_losses(m_dis_real, m_dis_fake_from_image, name='MI')
			# G_loss_ML, D_loss_ML = self.build_losses(m_dis_real, m_dis_fake_from_label, name='ML')

		# custom loss for membr
		with tf.name_scope('membr_loss'):
			def membr_loss(y_true, y_pred, name='membr_loss'):
				return tf.reduce_mean(tf.subtract(binary_cross_entropy(cvt2imag(y_true, maxVal=1.0), cvt2imag(y_pred, maxVal=1.0)), 
								   dice_coe(cvt2imag(y_true, maxVal=1.0), cvt2imag(y_pred, maxVal=1.0), axis=[1,2,3], loss_type='jaccard')),  name=name)
			membr_im = membr_loss(pm, pim, name='membr_im')
			# print membr_im
			# membr_lm = membr_loss(pm, plm, name='membr_lm')
			# membr_imlm = membr_loss(pm, pimlm, name='membr_imlm')
			# membr_lmim = membr_loss(pm, plmim, name='membr_lmim')
			# membr_mlm = membr_loss(pm, pmlm, name='membr_mlm')
			# membr_mim = membr_loss(pm, pmim, name='membr_mim')
		# custom loss for label
		with tf.name_scope('label_loss'):
			def label_loss(y_true_L, y_pred_L, y_grad_M, name='label_loss'):
				g_mag_grad_M = cvt2imag(y_grad_M, maxVal=1.0)
				mag_grad_L   = magnitute_central_difference(y_pred_L, name='mag_grad_L')
				cond = tf.greater(mag_grad_L, tf.zeros_like(mag_grad_L))
				thresholded_mag_grad_L = tf.where(cond, 
										   tf.ones_like(mag_grad_L), 
										   tf.zeros_like(mag_grad_L), 
										   name='thresholded_mag_grad_L')

				gtv_guess = tf.multiply(g_mag_grad_M, thresholded_mag_grad_L, name='gtv_guess')
				loss_gtv_guess = tf.reduce_mean(gtv_guess, name='loss_gtv_guess')

				thresholded_mag_grad_L = cvt2tanh(thresholded_mag_grad_L, maxVal=1.0)
				gtv_guess = cvt2tanh(gtv_guess, maxVal=1.0)
				return loss_gtv_guess, thresholded_mag_grad_L

			label_iml, g_iml = label_loss(None, piml, pim, name='label_iml')
			# label_lml, g_lml = label_loss(None, plml, plm, name='label_lml')
			# label_lmiml, g_lmiml = label_loss(None, plmiml, plmim, name='label_lmiml')
			label_ml,  g_ml  = label_loss(None, pml,  pm,  name='label_loss_ml')

		# custom loss for tf_rand_score
		with tf.name_scope('rand_loss'):
			rand_iml = tf.reduce_mean(tf.cast(tf.py_func (tf_rand_score, [piml, pl], tf.float64), tf.float32))
			rand_ml  = tf.reduce_mean(tf.cast(tf.py_func (tf_rand_score, [pml,  pl], tf.float64), tf.float32))


		self.g_loss = tf.add_n([
								#(recon_imi), # + recon_lmi + recon_imlmi), #
								(recon_iml), # + recon_lml + recon_lmiml), #
								(recon_im), #  + recon_lm + recon_mim + recon_mlm),
								(rand_iml), # + rand_lml + rand_lmiml), #
								(rand_ml), #  + rand_lm + rand_mim + rand_mlm),
								# (G_loss_IL + G_loss_LI + G_loss_MI + G_loss_ML), 
								(G_loss_LI + G_loss_MI), 
								(membr_im), # + membr_lm + membr_imlm + membr_lmim + membr_mlm + membr_mim),
								# (label_iml + label_lml + label_lmiml + label_ml)
								(label_iml + label_ml)
								], name='G_loss_total')
		self.d_loss = tf.add_n([
								# (D_loss_IL + D_loss_LI + D_loss_MI + D_loss_ML), 
								(D_loss_LI + D_loss_MI), 
								], name='D_loss_total')

		wd_g = regularize_cost('gen/.*/W', 		l2_regularizer(1e-5), name='G_regularize')
		wd_d = regularize_cost('discrim/.*/W', 	l2_regularizer(1e-5), name='D_regularize')

		self.g_loss = tf.add(self.g_loss, wd_g, name='g_loss')
		self.d_loss = tf.add(self.d_loss, wd_d, name='d_loss')

	

		self.collect_variables()

		add_moving_summary(self.d_loss, self.g_loss)
		add_moving_summary(
			recon_iml, 
			recon_im, 
			label_iml, 
			label_ml, 
			# rand_iml, 
			# rand_ml, 
			# membr_im
			# recon_imi, recon_lmi, recon_imlmi,
			# recon_lml, recon_iml, recon_lmiml,
			# recon_mim, recon_mlm, recon_im , recon_lm,
			)


		viz = tf.concat([tf.concat([ui, pi, pim, piml, g_iml], 2), 
						 # tf.concat([ul, pl, plm, plmi, plmim, plmiml], 2),
						 tf.concat([um, pl, pm, pml, g_ml], 2),
						 # tf.concat([pl, pl, g_iml, g_lml, g_lmiml,   g_ml], 2),
						 ], 1)
		# add_moving_summary(
		# 	recon_imi, recon_lmi,# recon_imlmi,
		# 	recon_lml, recon_iml,# recon_lmiml,
		# 	recon_mim, recon_mlm, recon_im , recon_lm,
		# 	)
		# viz = tf.concat([tf.concat([ui, pi, pim, piml], 2), 
		# 				 tf.concat([ul, pl, plm, plmi], 2),
		# 				 tf.concat([um, pm, pmi, pmim], 2),
		# 				 tf.concat([um, pm, pml, pmlm], 2),
		# 				 ], 1)
		viz = cvt2imag(viz)
		viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz')
		tf.summary.image('colorized', viz, max_outputs=50)
    def _build_graph(self, inputs):
        xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE)
                        for x in range(WARP_TARGET_SIZE)],
                       dtype='float32')
        xys = tf.constant(xys, dtype=tf.float32, name='xys')  # p x 3

        image, label = inputs

        image = image / 255.0 - 0.5  # bhw2

        def get_stn(image):
            stn = (LinearWrap(image).AvgPooling('downsample', 2).Conv2D(
                'conv0', 20, 5, padding='VALID').MaxPooling('pool0', 2).Conv2D(
                    'conv1', 20, 5, padding='VALID').FullyConnected(
                        'fc1', out_dim=32).FullyConnected(
                            'fct',
                            out_dim=6,
                            nl=tf.identity,
                            W_init=tf.constant_initializer(),
                            b_init=tf.constant_initializer(
                                [1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))())
            # output 6 parameters for affine transformation
            stn = tf.reshape(stn, [-1, 2, 3], name='affine')  # bx2x3
            stn = tf.reshape(tf.transpose(stn, [2, 0, 1]),
                             [3, -1])  # 3 x (bx2)
            coor = tf.reshape(tf.matmul(xys, stn),
                              [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2])
            coor = tf.transpose(coor, [2, 0, 1, 3],
                                'sampled_coords')  # b h w 2
            sampled = ImageSample('warp', [image, coor], borderMode='constant')
            return sampled

        with argscope([Conv2D, FullyConnected], nl=tf.nn.relu):
            with tf.variable_scope('STN1'):
                sampled1 = get_stn(image)
            with tf.variable_scope('STN2'):
                sampled2 = get_stn(image)

        # For visualization in tensorboard
        with tf.name_scope('visualization'):
            padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF],
                                        [HALF_DIFF, HALF_DIFF], [0, 0]])
            padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF],
                                        [HALF_DIFF, HALF_DIFF], [0, 0]])
            img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]],
                                 1)  # b x 2h  x w
            transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]],
                                   1)
            transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]],
                                   1)
            stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz')
            tf.summary.image('visualize',
                             tf.expand_dims(stacked, -1),
                             max_outputs=30)

        sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat')
        logits = (LinearWrap(sampled).FullyConnected(
            'fc1', out_dim=256, nl=tf.nn.relu).FullyConnected(
                'fc2', out_dim=128,
                nl=tf.nn.relu).FullyConnected('fct',
                                              out_dim=19,
                                              nl=tf.identity)())
        prob = tf.nn.softmax(logits, name='prob')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = symbf.prediction_incorrect(logits, label)
        summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/W', tf.nn.l2_loss),
                              name='regularize_loss')
        summary.add_moving_summary(cost, wd_cost)
        self.cost = tf.add_n([wd_cost, cost], name='cost')
Exemple #42
0
	def _build_graph(self, inputs):
		# sImg2d # sImg the projection 2D, reshape from 
		

		vol3d, img2d, condition = inputs # Split the input

		with tf.variable_scope('gen'):
			with tf.device('/device:GPU:0'):
				# Step 0; run thru 3d encoder
				with tf.variable_scope('encoder_3d'):
					vol2d = self.vol3d_encoder(vol3d)
				# Step 1: Run thru the encoder
				with tf.variable_scope('encoder_vgg19_2d'):
					vol2d_encoded, vol2d_feature = self.vgg19_encoder(vol2d)
					img2d_encoded, img2d_feature = self.vgg19_encoder(img2d)
				# Step 2: Run thru the adain block to get t=AdIN(f(c), f(s))
				with tf.variable_scope('style_transfer'):
					merge_encoded = self._build_adain_layers(vol2d_encoded, img2d_encoded)
					condition = tf.reshape(condition, []) # Make 0 rank for condition
					chose_encoded = tf.cond(condition > 0, # if istest turns on, perform statistical transfering
											lambda: tf.identity(merge_encoded), 
											lambda: tf.identity(vol2d_encoded)) #else get the img2d_encoded
					img2d_encoded = tf.identity(img2d_encoded)
			with tf.device('/device:GPU:1'):
				# Step 3: Run thru the decoder to get the paint image
				with tf.variable_scope('decoder_vgg19_2d'):
					vol2d_decoded = self.vgg19_decoder(chose_encoded)
					img2d_decoded = self.vgg19_decoder(img2d_encoded)
			with tf.device('/device:GPU:2'):
				with tf.variable_scope('decoder_3d'):
					vol3d_decoded = self.vol3d_decoder(vol2d_decoded)
					img3d_decoded = self.vol3d_decoder(img2d_decoded)

				# Step 0; run thru 3d encoder
				with tf.variable_scope('encoder_3d'):
					img3d_encoded = self.vol3d_encoder(img3d_decoded)

		with tf.variable_scope('discrim'):
			with tf.device('/device:GPU:3'):
				vol3d_real = self.vgg19_discriminator(vol3d)
				img2d_real = self.vgg19_discriminator(img2d)
			with tf.device('/device:GPU:4'):
				vol3d_fake = self.vgg19_discriminator(vol3d_decoded)
				img2d_fake = self.vgg19_discriminator(img3d_encoded)


		#
		# Build losses here
		#
		with tf.name_scope('losses'):
			losses = []
			# Content loss between t and f(g(t))
			# loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d')
			loss_vol3d = tf.reduce_mean(tf.abs(vol3d - vol3d_decoded), name='loss_vol3d')
			loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d')
			loss_img2d = tf.reduce_mean(tf.abs(img2d - img2d_decoded), name='loss_img2d')
			loss_img3d = tf.reduce_mean(tf.abs(img2d - img3d_encoded), name='loss_img3d')
			# loss_img3d = tf.reduce_mean(tf.abs(img3d - img3d_decoded), name='loss_img3d')


			add_moving_summary(loss_vol3d)
			add_moving_summary(loss_vol2d)
			add_moving_summary(loss_img2d)
			add_moving_summary(loss_img3d)


			losses.append(1e0*loss_vol3d)
			# losses.append(1e0*loss_vol2d)
			# losses.append(1e1*loss_img2d)
			losses.append(1e0*loss_img3d)

			# GAN loss
			print(vol3d_real)
			print(img2d_real)
			print(vol3d_fake)
			print(img2d_fake)
			G_loss, D_loss = self.build_losses(tf.concat([vol3d_real, img2d_real], axis=0),  
											   tf.concat([vol3d_fake, img2d_fake], axis=0))
			losses.append(G_loss)
		self.g_loss = tf.reduce_sum(losses, name='G_loss_total')
		self.d_loss = tf.reduce_sum([D_loss], name='D_loss_total')

		self.collect_variables()


		out_vol3d 			= tf.identity(vol3d, 		 name='out_vol3d')
		out_vol3d_decoded 	= tf.identity(vol3d_decoded, name='out_vol3d_decoded')
		with tf.name_scope('visualization'):
			mid=128
			viz_vol_0 = vol3d[mid-2:mid-1,...]
			viz_vol_1 = vol3d[mid-1:mid-0,...]
			viz_vol_2 = vol3d[mid+0:mid+1,...]
			viz_vol_3 = vol3d[mid+1:mid+2,...]

			viz_vol_4 = vol3d_decoded[mid-2:mid-1,...]
			viz_vol_5 = vol3d_decoded[mid-1:mid-0,...]
			viz_vol_6 = vol3d_decoded[mid+0:mid+1,...]
			viz_vol_7 = vol3d_decoded[mid+1:mid+2,...]

			viz_vol_8 = vol2d
			viz_vol_9 = vol2d_decoded
			####
			viz_img_0 = img3d_decoded[mid-2:mid-1,...]
			viz_img_1 = img3d_decoded[mid-1:mid-0,...]
			viz_img_2 = img3d_decoded[mid+0:mid+1,...]
			viz_img_3 = img3d_decoded[mid+1:mid+2,...]


			viz_img_4 = img2d
			viz_img_5 = img2d_decoded
			viz_img_6 = img3d_encoded


			viz_zeros = tf.zeros_like(img2d)
			# Visualization
			viz = tf.concat([tf.concat([viz_vol_0, viz_vol_1, viz_vol_2, viz_vol_3, viz_vol_8, viz_img_4], 2), 
							 tf.concat([viz_vol_4, viz_vol_5, viz_vol_6, viz_vol_7, viz_vol_9, viz_img_5], 2), 
							 tf.concat([viz_img_0, viz_img_1, viz_img_2, viz_img_3, viz_img_6, viz_img_4], 2), 
							 ], 1)

			viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz')
			tf.summary.image('colorized', viz, max_outputs=50)
Exemple #43
0
    def build_graph(self, *inputs):
        mimg, mflag, pimg, pflag, pve, target = inputs[:6]
        h_stats = list(inputs[6:])

        def img_flag_to_feat(img, flag, embed):
            # B x maxD x maxD x layer_embedding_size
            feat = tf.nn.embedding_lookup(embed, img)
            feat = Dropout(feat, keep_prob=self.dropout_kp)
            # concat connection feature with layer-wise flag feature.
            flag_feat = tf.reshape(
                tf.tile(flag, [1, 1, self.max_depth]),
                [-1, self.max_depth, self.max_depth, self.n_flags])
            flag_feat = tf.cast(flag_feat, tf.float32)
            l = tf.concat([feat, flag_feat], axis=3, name='concat_feats')

            # feature are now NCHW format
            l = tf.transpose(l, [0, 3, 1, 2])

            # make the feature tensor symmetry on HxW
            lower_l = tf.matrix_band_part(l, -1, 0)
            upper_l = tf.matrix_transpose(lower_l)
            diag_l = tf.matrix_band_part(l, 0, 0)
            l = lower_l + upper_l - diag_l
            return l

        with tf.variable_scope(self.vs_name):
            # embed the connection types.
            initializer = tf.random_uniform_initializer(-0.1, 0.1)
            vocab_size = LayerTypes.num_layer_types()
            embeddingW = tf.get_variable(
                'embedding', [vocab_size, self.layer_embedding_size],
                initializer=initializer)
            mfeat = img_flag_to_feat(mimg, mflag, embeddingW)
            pfeat = img_flag_to_feat(pimg, pflag, embeddingW)
            l = tf.concat(values=[mfeat, pfeat], axis=1)

            data_format = 'channels_first'
            ch_dim = 1
            # network on the combined feature.
            with argscope([Conv2D, Deconv2D, GroupedConv2D, AvgPooling, MaxPooling, \
                    BatchNorm, GlobalAvgPooling, ResizeImages, SeparableConv2D], \
                    data_format=data_format), \
                argscope([Conv2D, Deconv2D, GroupedConv2D, SeparableConv2D], \
                    activation=tf.identity, use_bias=False):

                n_layers_per_scale = 4
                n_scales = 4
                out_filters = l.get_shape().as_list()[ch_dim]
                for si in range(n_scales):
                    for li in range(n_layers_per_scale):
                        name = 'layer{:03d}'.format(si * n_layers_per_scale +
                                                    li)
                        strides = 1
                        if li == 0 and si > 0:
                            strides = 2
                            out_filters *= 2
                        with tf.variable_scope(name):
                            l = residual_bottleneck_layer(
                                'res_btl', l, out_filters, strides,
                                data_format)

                # only use the last output for predicting the child model accuracy
                l = GlobalAvgPooling('gap', l)
                pve = tf.reshape(pve, [-1, 1])
                h_stats = [tf.reshape(hs, [-1, 1]) for hs in h_stats]
                l = tf.concat(values=[pve, l] + h_stats, axis=ch_dim)
                pred = FullyConnected('fully_connect',
                                      l,
                                      1,
                                      activation=tf.sigmoid)
                pred = tf.reshape(pred, [-1])
                self.pred = tf.identity(pred, name='predicted_accuracy')

                cost = tf.losses.mean_squared_error(target, self.pred)
                self.cost = tf.identity(cost, name='cost')
                add_moving_summary(self.cost)
                return self.cost
Exemple #44
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 128.0

        def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool,
                      pooltype):
            stride = 2 if nr1x1 == 0 else 1
            with tf.variable_scope(name):
                outs = []
                if nr1x1 != 0:
                    outs.append(Conv2D('conv1x1', x, nr1x1, 1))
                x2 = Conv2D('conv3x3r', x, nr3x3r, 1)
                outs.append(Conv2D('conv3x3', x2, nr3x3, 3, stride=stride))

                x3 = Conv2D('conv233r', x, nr233r, 1)
                x3 = Conv2D('conv233a', x3, nr233, 3)
                outs.append(Conv2D('conv233b', x3, nr233, 3, stride=stride))

                if pooltype == 'max':
                    x4 = MaxPooling('mpool', x, 3, stride, padding='SAME')
                else:
                    assert pooltype == 'avg'
                    x4 = AvgPooling('apool', x, 3, stride, padding='SAME')
                if nrpool != 0:  # pool + passthrough if nrpool == 0
                    x4 = Conv2D('poolproj', x4, nrpool, 1)
                outs.append(x4)
                return tf.concat(outs, 3, name='concat')

        with argscope(Conv2D, nl=BNReLU, use_bias=False):
            l = (LinearWrap(image).Conv2D('conv0', 64, 7, stride=2).MaxPooling(
                'pool0', 3, 2, padding='SAME').Conv2D('conv1', 64, 1).Conv2D(
                    'conv2', 192, 3).MaxPooling('pool2', 3, 2,
                                                padding='SAME')())
            # 28
            l = inception('incep3a', l, 64, 64, 64, 64, 96, 32, 'avg')
            l = inception('incep3b', l, 64, 64, 96, 64, 96, 64, 'avg')
            l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max')

            br1 = (LinearWrap(l).Conv2D('loss1conv', 128, 1).FullyConnected(
                'loss1fc', 1024,
                nl=tf.nn.relu).FullyConnected('loss1logit',
                                              1000,
                                              nl=tf.identity)())
            loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=br1, labels=label)
            loss1 = tf.reduce_mean(loss1, name='loss1')

            # 14
            l = inception('incep4a', l, 224, 64, 96, 96, 128, 128, 'avg')
            l = inception('incep4b', l, 192, 96, 128, 96, 128, 128, 'avg')
            l = inception('incep4c', l, 160, 128, 160, 128, 160, 128, 'avg')
            l = inception('incep4d', l, 96, 128, 192, 160, 192, 128, 'avg')
            l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max')

            br2 = Conv2D('loss2conv', l, 128, 1)
            br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu)
            br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity)
            loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=br2, labels=label)
            loss2 = tf.reduce_mean(loss2, name='loss2')

            # 7
            l = inception('incep5a', l, 352, 192, 320, 160, 224, 128, 'avg')
            l = inception('incep5b', l, 352, 192, 320, 192, 224, 128, 'max')
            l = GlobalAvgPooling('gap', l)

            logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity)
        tf.nn.softmax(logits, name='output')
        loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                               labels=label)
        loss3 = tf.reduce_mean(loss3, name='loss3')

        cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1],
                        name='weighted_cost')
        add_moving_summary([cost, loss1, loss2, loss3])

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1'))

        wrong = prediction_incorrect(logits, label, 5, name='wrong-top5')
        add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 80000,
                                          0.7, True)
        wd_cost = tf.multiply(wd_w,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='l2_regularize_loss')

        self.cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(wd_cost, self.cost)
Exemple #45
0
    def build_graph(self, img, box, mask, bbx, z, z2):

        with tf.name_scope('preprocess'):
            img_crop = tf.multiply(img, 1 - box)
            img = (img / 128.0 - 1.0)
            img_crop = (img_crop / 128.0 - 1.0)
            bin_mask = mask/255.
            mask = (mask / 128.0 - 1.0)

        with tf.name_scope('styleIn'):
            style_shape_z2 = [tf.shape(mask)[0], 1, 1, STYLE_DIM_z2]
            z3 = tf.random_normal(style_shape_z2, mean=0.0, stddev=1.0, dtype=tf.float32, name='z3')

        def vizN(name, a):
            with tf.name_scope(name):
                im = tf.concat(a, axis=2)
                im = (im + 1.0) * 128
                im = tf.clip_by_value(im, 0, 255)
                im = tf.cast(im, tf.uint8, name='viz')
            tf.summary.image(name, im, max_outputs=50)

        # use the initializers from torch
        with argscope([Conv2D, Conv2DTranspose, tf.layers.conv2d]):
            #Let us encode the images
            with tf.variable_scope('gen'):

                bin_gen_mask_gt = tf.round((mask + 1) * 0.5)
                in_gen_gt = img *(1-bin_gen_mask_gt) - bin_gen_mask_gt

                with tf.variable_scope('senc'):
                    zgt_mean, zgt_var = self.style_encoder(img*bin_gen_mask_gt)

                zgt = z_sample(zgt_mean, zgt_var)
                zmat = tf.tile(zgt, [1, in_gen_gt.shape[1], in_gen_gt.shape[2], 1])
                z2mat = tf.tile(z2, [1, in_gen_gt.shape[1], in_gen_gt.shape[2], 1])
                z3mat = tf.tile(z3, [1, in_gen_gt.shape[1], in_gen_gt.shape[2], 1])
                with tf.variable_scope('genRGB'):
                    gen_im = self.generator(in_gen_gt, z2mat, nb_blocks)
                    gen_im = gen_im*bin_gen_mask_gt + img*(1 - bin_gen_mask_gt)

                    gen_im_z3 = self.generator(in_gen_gt, z3mat, nb_blocks)
                    gen_im_z3 = gen_im_z3*bin_gen_mask_gt + img*(1 - bin_gen_mask_gt)

                    gen_im_gt = self.generator(in_gen_gt, zmat, nb_blocks)
                    gen_im_gt = gen_im_gt*bin_gen_mask_gt + img*(1 - bin_gen_mask_gt)

                with tf.variable_scope('senc'):
                    z3_recon, _ = self.style_encoder(gen_im_z3*bin_gen_mask_gt)

            f1, f2, f3, f4 = self.vgg_16(tf.concat([(img+1)*0.5, (gen_im_gt+1)*0.5], axis=0))


            #The final discriminator that takes them both
            discrim_out_mask = []
            discrim_fm_real_mask = []
            discrim_fm_fake_mask = []

            discrim_out = []
            discrim_out_z3 = []
            discrim_fm_real = []
            discrim_fm_fake = []

            with tf.variable_scope('discrim'):

                with tf.variable_scope('discrim_im'):

                    D_input_real = tf.concat([img, mask], axis=-1)
                    D_input_fake = tf.concat([gen_im_gt, mask], axis=-1)
                    D_inputs = [D_input_real, D_input_fake]

                    for s in range(DIS_SCALE):
                        with tf.variable_scope('s%d'%s):
                            if s != 0:
                                D_inputs = [downsample(im) for im in D_inputs]

                            im_s, im_recon_s = D_inputs

                            with tf.variable_scope('Ax'):
                                Ax_feats_real, Ax_fm_real = self.discrim_enc(im_s)
                                Ax_feats_fake, Ax_fm_fake = self.discrim_enc(im_recon_s)

                            with tf.variable_scope('Ah'):
                                Ah_dis_real, Ah_fm_real = self.discrim_patch_classify(Ax_feats_real)
                                Ah_dis_fake, Ah_fm_fake = self.discrim_patch_classify(Ax_feats_fake)

                            discrim_out.append((Ah_dis_real, Ah_dis_fake))
                            discrim_fm_real += Ax_fm_real + Ah_fm_real
                            discrim_fm_fake += Ax_fm_fake + Ah_fm_fake

                with tf.variable_scope('discrim_im', reuse=True):

                    D_input_real = tf.concat([img, mask], axis=-1)
                    D_input_fake = tf.concat([gen_im_z3, mask], axis=-1)
                    D_inputs = [D_input_real, D_input_fake]

                    for s in range(DIS_SCALE):
                        with tf.variable_scope('s%d'%s):
                            if s != 0:
                                D_inputs = [downsample(im) for im in D_inputs]

                            im_s, im_recon_s = D_inputs

                            with tf.variable_scope('Ax'):
                                Ax_feats_real, _ = self.discrim_enc(im_s)
                                Ax_feats_fake, _ = self.discrim_enc(im_recon_s)

                            with tf.variable_scope('Ah'):
                                Ah_dis_real, _ = self.discrim_patch_classify(Ax_feats_real)
                                Ah_dis_fake, _ = self.discrim_patch_classify(Ax_feats_fake)

                            discrim_out_z3.append((Ah_dis_real, Ah_dis_fake))


            vizN('A_recon', [img, gen_im_gt, gen_im_z3, gen_im])

        def LSGAN_hinge_loss(real, fake):
            d_real = tf.reduce_mean(-tf.minimum(0., tf.subtract(real, 1.)), name='d_real')
            d_fake = tf.reduce_mean(-tf.minimum(0., tf.add(-fake,-1.)), name='d_fake')
            d_loss = tf.multiply(d_real + d_fake, 0.5, name='d_loss')

            g_loss = tf.reduce_mean(-fake, name='g_loss')
            # add_moving_summary(g_loss)
            return g_loss, d_loss

        numelmask = tf.reduce_sum(bin_gen_mask_gt, axis=[1, 2, 3])
        numelall = tf.ones_like(numelmask) * SHAPE * SHAPE
        numelmask = tf.where(tf.equal(numelmask, 0), numelall, numelmask)
        weight_recon_loss = numelall / numelmask
        with tf.name_scope('losses'):
            with tf.name_scope('RGB_losses'):
                with tf.name_scope('GAN_loss'):
                    # gan loss
                    G_loss, D_loss = zip(*[LSGAN_hinge_loss(real, fake) for real, fake in discrim_out])
                    G_loss = tf.add_n(G_loss, name='lsgan_loss')
                    D_loss = tf.add_n(D_loss, name='Disc_loss')
                with tf.name_scope('GAN_loss_z3'):
                    # gan loss
                    G_loss_z3, D_loss_z3 = zip(*[LSGAN_hinge_loss(real, fake) for real, fake in discrim_out_z3])
                    G_loss_z3 = tf.add_n(G_loss_z3, name='lsgan_loss')
                    D_loss_z3 = tf.add_n(D_loss_z3, name='Disc_loss')
                with tf.name_scope('z_recon_loss'):
                    z3_recon_loss = tf.reduce_mean(tf.abs(z3 - z3_recon), name='z3_recon_loss')
                with tf.name_scope('FM_loss'):
                    FM_loss = [tf.reduce_mean(tf.abs(j - k))for j,k in zip(discrim_fm_real, discrim_fm_fake)]
                    FM_loss = tf.add_n(FM_loss)/len(FM_loss)
                with tf.name_scope('im_recon_loss'):
                    im_recon_loss = tf.reduce_mean(tf.reduce_mean(tf.abs(img - gen_im_gt), axis=[1,2,3])*weight_recon_loss)
                with tf.name_scope('kl_loss'):
                    KLloss = kl_loss(zgt_mean, zgt_var)
                with tf.name_scope('perceptualLoss'):
                    f3_1, f3_2 = tf.split(f3, 2, 0)
                    # perceptual_loss = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(f3_1, f3_2), axis=[1,2,3])*weight_recon_loss)
                    perceptual_loss = tf.nn.l2_loss(f3_1-f3_2)/tf.to_float(tf.size(f3_1))

                    # perceptual_loss = percep_loss([f1, f2, f3, f4], [1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0])




        LAMBDA = 10.0
        LAMBDA_KL = 0.05
        self.g_loss = G_loss/DIS_SCALE + G_loss_z3/DIS_SCALE + LAMBDA*FM_loss + LAMBDA*im_recon_loss + LAMBDA_KL*KLloss \
                      + LAMBDA*perceptual_loss
        self.d_loss = D_loss + D_loss_z3
        self.z_loss = LAMBDA * z3_recon_loss
        self.collect_variables('gen', 'discrim')

        tf.summary.histogram('z_var', zgt_var)
        tf.summary.histogram('z_mean', zgt_mean)
        add_moving_summary(G_loss, D_loss, FM_loss, im_recon_loss,
                           KLloss, z3_recon_loss, perceptual_loss)
Exemple #46
0
    def _build_graph(self, inputs):
        image, label = inputs
        image = image / 128.0
        assert tf.test.is_gpu_available()
        image = tf.transpose(image, [0, 3, 1, 2])

        def residual(name, l, increase_dim=False, first=False):
            shape = l.get_shape().as_list()
            in_channel = shape[1]

            if increase_dim:
                out_channel = in_channel * 2
                stride1 = 2
            else:
                out_channel = in_channel
                stride1 = 1

            with tf.variable_scope(name):
                b1 = l if first else BNReLU(l)
                c1 = Conv2D('conv1',
                            b1,
                            out_channel,
                            stride=stride1,
                            nl=BNReLU)
                c2 = Conv2D('conv2', c1, out_channel)
                if increase_dim:
                    l = AvgPooling('pool', l, 2)
                    l = tf.pad(l, [[0, 0], [in_channel // 2, in_channel // 2],
                                   [0, 0], [0, 0]])

                l = c2 + l
                return l

        with argscope([Conv2D, AvgPooling, BatchNorm, GlobalAvgPooling], data_format='NCHW'), \
                argscope(Conv2D, nl=tf.identity, use_bias=False, kernel_shape=3,
                         W_init=variance_scaling_initializer(mode='FAN_OUT')):
            l = Conv2D('conv0', image, 16, nl=BNReLU)
            l = residual('res1.0', l, first=True)
            for k in range(1, self.n):
                l = residual('res1.{}'.format(k), l)
            # 32,c=16

            l = residual('res2.0', l, increase_dim=True)
            for k in range(1, self.n):
                l = residual('res2.{}'.format(k), l)
            # 16,c=32

            l = residual('res3.0', l, increase_dim=True)
            for k in range(1, self.n):
                l = residual('res3.' + str(k), l)
            l = BNReLU('bnlast', l)
            # 8,c=64
            l = GlobalAvgPooling('gap', l)

        logits = FullyConnected('linear', l, out_dim=10, nl=tf.identity)
        tf.nn.softmax(logits, name='output')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')

        wrong = tf.to_float(tf.logical_not(tf.nn.in_top_k(logits, label, 1)),
                            name='wrong_vector')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W of fc layers
        wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                          480000, 0.2, True)
        wd_cost = tf.multiply(wd_w,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='wd_cost')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))  # monitor W
        self.cost = tf.add_n([cost, wd_cost], name='cost')
    def _build_graph(self, inputs):
        real_sample = inputs[0]
        real_sample = tf.expand_dims(real_sample, -1)

        # latent space is cat(10) x uni(1) x uni(1) x noise(NOISE_DIM)
        self.factors = ProductDistribution("factors", [
            CategoricalDistribution("cat", 10),
            GaussianWithUniformSample("uni_a", 1),
            GaussianWithUniformSample("uni_b", 1)
        ])
        # prior: the assumption how the factors are presented in the dataset
        prior = tf.constant([0.1] * 10 + [0, 0],
                            tf.float32, [12],
                            name='prior')
        batch_prior = tf.tile(tf.expand_dims(prior, 0), [BATCH, 1],
                              name='batch_prior')

        # sample the latent code:
        zc = symbf.shapeless_placeholder(self.factors.sample(BATCH, prior),
                                         0,
                                         name='z_code')
        z_noise = symbf.shapeless_placeholder(tf.random_uniform(
            [BATCH, NOISE_DIM], -1, 1),
                                              0,
                                              name='z_noise')
        z = tf.concat([zc, z_noise], 1, name='z')

        with argscope([Conv2D, Deconv2D, FullyConnected],
                      W_init=tf.truncated_normal_initializer(stddev=0.02)):
            with tf.variable_scope('gen'):
                fake_sample = self.generator(z)
                fake_sample_viz = tf.cast((fake_sample) * 255.0,
                                          tf.uint8,
                                          name='viz')
                tf.summary.image('gen', fake_sample_viz, max_outputs=30)

            # may need to investigate how bn stats should be updated across two discrim
            with tf.variable_scope('discrim'):
                real_pred, _ = self.discriminator(real_sample)
                fake_pred, dist_param = self.discriminator(fake_sample)
        """
        Mutual information between x (i.e. zc in this case) and some
        information s (the generated samples in this case):

                    I(x;s) = H(x) - H(x|s)
                           = H(x) + E[\log P(x|s)]

        The distribution from which zc is sampled, in this case, is set to a fixed prior already.
        For the second term, we can maximize its variational lower bound:
                    E_{x \sim P(x|s)}[\log Q(x|s)]
        where Q(x|s) is a proposal distribution to approximate P(x|s).

        Here, Q(x|s) is assumed to be a distribution which shares the form
        of self.factors, and whose parameters are predicted by the discriminator network.
        """
        with tf.name_scope("mutual_information"):
            ents = self.factors.entropy(zc, batch_prior)
            entropy = tf.add_n(ents, name='total_entropy')
            # Note that dropping this term has no effect because the entropy
            # of prior is a constant. The paper mentioned it but didn't use it.
            # Adding this term may make the curve less stable because the
            # entropy estimated from the samples is not the true value.

            # post-process output vector from discriminator to obtain valid distribution parameters
            encoder_activation = self.factors.encoder_activation(dist_param)
            cond_ents = self.factors.entropy(zc, encoder_activation)
            cond_entropy = tf.add_n(cond_ents,
                                    name="total_conditional_entropy")

            MI = tf.subtract(entropy, cond_entropy, name='mutual_information')
            summary.add_moving_summary(entropy, cond_entropy, MI, *ents)

        # default GAN objective
        self.build_losses(real_pred, fake_pred)

        # subtract mutual information for latent factors (we want to maximize them)
        self.g_loss = tf.subtract(self.g_loss, MI, name='total_g_loss')
        self.d_loss = tf.subtract(self.d_loss, MI, name='total_d_loss')

        summary.add_moving_summary(self.g_loss, self.d_loss)

        # distinguish between variables of generator and discriminator updates
        self.collect_variables()
Exemple #48
0
    def build_graph(self, x, bboxes_xyz, bboxes_lwh, box3d_pts_label,
                    semantic_labels, heading_labels, heading_residuals,
                    size_labels, size_residuals):
        # def build_graph(self, x, bboxes_xyz, bboxes_lwh, semantic_labels, heading_labels, heading_residuals, size_labels, size_residuals):
        l0_xyz = x
        l0_points = None

        # Set Abstraction layers
        l1_xyz, l1_points, l1_indices = pointnet_sa_module(l0_xyz,
                                                           l0_points,
                                                           npoint=2048,
                                                           radius=0.2,
                                                           nsample=64,
                                                           mlp=[64, 64, 128],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa1')
        l2_xyz, l2_points, l2_indices = pointnet_sa_module(l1_xyz,
                                                           l1_points,
                                                           npoint=1024,
                                                           radius=0.4,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa2')
        l3_xyz, l3_points, l3_indices = pointnet_sa_module(l2_xyz,
                                                           l2_points,
                                                           npoint=512,
                                                           radius=0.8,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa3')
        l4_xyz, l4_points, l4_indices = pointnet_sa_module(l3_xyz,
                                                           l3_points,
                                                           npoint=256,
                                                           radius=1.2,
                                                           nsample=64,
                                                           mlp=[128, 128, 256],
                                                           mlp2=None,
                                                           group_all=False,
                                                           scope='sa4')
        # Feature Propagation layers
        l3_points = pointnet_fp_module(l3_xyz,
                                       l4_xyz,
                                       l3_points,
                                       l4_points, [256, 256],
                                       scope='fp1')
        seeds_points = pointnet_fp_module(l2_xyz,
                                          l3_xyz,
                                          l2_points,
                                          l3_points, [256, 256],
                                          scope='fp2')
        seeds_xyz = l2_xyz

        # Voting Module layers
        offset = self.hough_voting_mlp(seeds_points)

        votes_xyz_points = tf.concat([seeds_xyz, seeds_points], 2) + offset
        votes_xyz, votes_points = tf.slice(votes_xyz_points, (0, 0, 0), (-1, -1, 3)), \
            tf.slice(votes_xyz_points, (0, 0, 3), (-1, -1, -1))

        vote_reg_loss = self.vote_reg_loss(seeds_xyz, votes_xyz, bboxes_xyz,
                                           box3d_pts_label)

        # Proposal Module layers
        # Farthest point sampling on seeds
        proposals_xyz, proposals_output, _ = pointnet_sa_module(
            votes_xyz,
            votes_points,
            npoint=config.PROPOSAL_NUM,
            radius=0.3,
            nsample=64,
            mlp=[128, 128, 128],
            mlp2=[128, 128, 5 + 2 * config.NH + 4 * config.NS + config.NC],
            group_all=False,
            scope='proposal')

        nms_iou = tf.get_variable('nms_iou',
                                  shape=[],
                                  initializer=tf.constant_initializer(0.25),
                                  trainable=False)
        if not get_current_tower_context().is_training:

            def get_3d_bbox(box_size, heading_angle, center):
                batch_size = tf.shape(heading_angle)[0]
                c = tf.cos(heading_angle)
                s = tf.sin(heading_angle)
                zeros = tf.zeros_like(c)
                ones = tf.ones_like(c)
                rotation = tf.reshape(
                    tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c],
                             -1), tf.stack([batch_size, -1, 3, 3]))
                l, w, h = box_size[..., 0], box_size[..., 1], box_size[
                    ..., 2]  # lwh(xzy) order!!!
                corners = tf.reshape(
                    tf.stack([
                        l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2,
                        -l / 2, h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2,
                        -h / 2, -h / 2, w / 2, -w / 2, -w / 2, w / 2, w / 2,
                        -w / 2, -w / 2, w / 2
                    ], -1), tf.stack([batch_size, -1, 3, 8]))
                return tf.einsum('ijkl,ijlm->ijmk',
                                 rotation, corners) + tf.expand_dims(
                                     center, 2)  # B * N * 8 * 3

            class_mean_size_tf = tf.constant(class_mean_size)
            size_cls_pred = tf.argmax(
                proposals_output[..., 5 + 2 * config.NH:5 + 2 * config.NH +
                                 config.NS],
                axis=-1)
            size_cls_pred_onehot = tf.one_hot(size_cls_pred,
                                              depth=config.NS,
                                              axis=-1)  # B * N * NS
            size_residual_pred = tf.reduce_sum(
                tf.expand_dims(size_cls_pred_onehot, -1) * tf.reshape(
                    proposals_output[..., 5 + 2 * config.NH + config.NS:5 +
                                     2 * config.NH + 4 * config.NS],
                    (-1, config.PROPOSAL_NUM, config.NS, 3)),
                axis=2)
            size_pred = tf.gather_nd(
                class_mean_size_tf,
                tf.expand_dims(size_cls_pred, -1)) * tf.maximum(
                    1 + size_residual_pred, 1e-6)  # B * N * 3: size
            # with tf.control_dependencies([tf.print(size_pred[0, 0, 2])]):
            center_pred = proposals_xyz + proposals_output[...,
                                                           2:5]  # B * N * 3
            heading_cls_pred = tf.argmax(proposals_output[...,
                                                          5:5 + config.NH],
                                         axis=-1)
            heading_cls_pred_onehot = tf.one_hot(heading_cls_pred,
                                                 depth=config.NH,
                                                 axis=-1)
            heading_residual_pred = tf.reduce_sum(
                heading_cls_pred_onehot *
                proposals_output[..., 5 + config.NH:5 + 2 * config.NH],
                axis=2)
            heading_pred = tf.floormod(
                (tf.cast(heading_cls_pred, tf.float32) * 2 +
                 heading_residual_pred) * np.pi / config.NH, 2 * np.pi)

            # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]):
            bboxes = get_3d_bbox(
                size_pred, heading_pred,
                center_pred)  # B * N * 8 * 3,  lhw(xyz) order!!!

            # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1)  # B * N * 6,  lhw(xyz) order!!!
            # with tf.control_dependencies([tf.print(bboxes[0, 0])]):
            nms_idx = NMS3D(bboxes,
                            tf.reduce_max(proposals_output[..., -config.NC:],
                                          axis=-1), proposals_output[..., :2],
                            nms_iou)  # Nnms * 2

            bboxes_pred = tf.gather_nd(bboxes, nms_idx,
                                       name='bboxes_pred')  # Nnms * 8 * 3
            class_scores_pred = tf.gather_nd(
                proposals_output[..., -config.NC:],
                nms_idx,
                name='class_scores_pred')  # Nnms * C
            batch_idx = tf.identity(
                nms_idx[:, 0], name='batch_idx'
            )  # Nnms, this is used to identify between batches

            return

        # calculate positive and negative proposal idxes
        bboxes_xyz_gt = bboxes_xyz  # B * BB * 3
        bboxes_labels_gt = semantic_labels  # B * BB
        bboxes_heading_labels_gt = heading_labels
        bboxes_heading_residuals_gt = heading_residuals
        bboxes_size_labels_gt = size_labels
        bboxes_size_residuals_gt = size_residuals

        dist_mat = tf.norm(tf.expand_dims(proposals_xyz, 2) -
                           tf.expand_dims(bboxes_xyz_gt, 1),
                           axis=-1)  # B * PR * BB
        bboxes_assignment = tf.argmin(dist_mat, axis=-1)  # B * PR
        min_dist = tf.reduce_min(dist_mat, axis=-1)

        thres_mid = tf.reduce_mean(min_dist, axis=-1, keepdims=True)
        thres_min = tf.reduce_min(min_dist, axis=-1, keepdims=True)
        thres_max = tf.reduce_max(min_dist, axis=-1, keepdims=True)
        POSITIVE_THRES, NEGATIVE_THRES = (thres_mid + thres_min) / 2.0, (
            thres_mid + thres_max) / 2.0

        positive_idxes = tf.where(min_dist < POSITIVE_THRES)
        negative_idxes = tf.where(min_dist > NEGATIVE_THRES)
        positive_gt_idxes = tf.stack([
            positive_idxes[:, 0],
            tf.gather_nd(bboxes_assignment, positive_idxes)
        ],
                                     axis=1)

        # objectiveness loss
        pos_obj_cls_score = tf.gather_nd(proposals_output[..., :2],
                                         positive_idxes)
        pos_obj_cls_gt = tf.ones([tf.shape(positive_idxes)[0]], dtype=tf.int32)
        neg_obj_cls_score = tf.gather_nd(proposals_output[..., :2],
                                         negative_idxes)
        neg_obj_cls_gt = tf.zeros([tf.shape(negative_idxes)[0]],
                                  dtype=tf.int32)
        obj_cls_loss = tf.identity(
            (tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=pos_obj_cls_score, labels=pos_obj_cls_gt)) +
             tf.reduce_mean(
                 tf.nn.sparse_softmax_cross_entropy_with_logits(
                     logits=neg_obj_cls_score, labels=neg_obj_cls_gt))) / 2.0,
            name='obj_cls_loss')
        obj_correct = tf.concat([
            tf.cast(tf.nn.in_top_k(pos_obj_cls_score, pos_obj_cls_gt, 1),
                    tf.float32),
            tf.cast(tf.nn.in_top_k(neg_obj_cls_score, neg_obj_cls_gt, 1),
                    tf.float32)
        ],
                                axis=0,
                                name='obj_correct')
        obj_accuracy = tf.reduce_mean(obj_correct, name='obj_accuracy')

        # center regression losses
        center_gt = tf.gather_nd(bboxes_xyz_gt, positive_gt_idxes)
        delta_predicted = tf.gather_nd(proposals_output[..., 2:5],
                                       positive_idxes)
        delta_gt = center_gt - tf.gather_nd(proposals_xyz, positive_idxes)
        center_loss = tf.reduce_mean(
            tf.reduce_sum(tf.losses.huber_loss(
                labels=delta_gt,
                predictions=delta_predicted,
                reduction=tf.losses.Reduction.NONE),
                          axis=-1))

        # Appendix A1: chamfer loss, assignment at one bbox to each gt bbox
        bboxes_assignment_dual = tf.argmin(dist_mat, axis=1)  # B * BB
        batch_idx = tf.tile(
            tf.expand_dims(tf.range(
                tf.shape(bboxes_assignment_dual, out_type=tf.int64)[0]),
                           axis=-1),
            [1, tf.shape(bboxes_assignment_dual)[1]])  # B * BB
        delta_gt_dual = bboxes_xyz_gt - tf.gather_nd(
            proposals_xyz,
            tf.stack([batch_idx, bboxes_assignment_dual],
                     axis=-1))  # B * BB * 3
        delta_predicted_dual = tf.gather_nd(
            proposals_output[..., 2:5],
            tf.stack([batch_idx, bboxes_assignment_dual],
                     axis=-1))  # B * BB * 3)
        center_loss_dual = tf.reduce_mean(
            tf.reduce_sum(tf.losses.huber_loss(
                labels=delta_gt_dual,
                predictions=delta_predicted_dual,
                reduction=tf.losses.Reduction.NONE),
                          axis=-1))

        # add up
        center_loss += center_loss_dual
        center_loss = tf.identity(center_loss, 'center_loss')

        # heading classification loss
        heading_cls_gt = tf.gather_nd(bboxes_heading_labels_gt,
                                      positive_gt_idxes)
        heading_cls_score = tf.gather_nd(
            proposals_output[..., 5:5 + config.NH], positive_idxes)
        heading_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=heading_cls_score, labels=heading_cls_gt),
            name='heading_cls_loss')
        # heading residual loss
        heading_cls_gt_onehot = tf.one_hot(heading_cls_gt,
                                           depth=config.NH,
                                           on_value=1,
                                           off_value=0,
                                           axis=-1)  # Np * NH
        heading_residual_gt = tf.gather_nd(bboxes_heading_residuals_gt,
                                           positive_gt_idxes) / (
                                               np.pi / config.NH)  # Np
        heading_residual_predicted = tf.gather_nd(
            proposals_output[..., 5 + config.NH:5 + 2 * config.NH],
            positive_idxes)  #  Np * NH
        heading_residual_loss = tf.losses.huber_loss(labels=heading_residual_gt,
                                                     predictions=tf.reduce_sum(heading_residual_predicted * \
                                                                               tf.to_float(heading_cls_gt_onehot),
                                                                               axis=1),
                                                     reduction=tf.losses.Reduction.MEAN)
        heading_residual_loss = tf.identity(heading_residual_loss,
                                            name='heading_residual_loss')

        # Size loss
        size_cls_gt = tf.gather_nd(bboxes_size_labels_gt, positive_gt_idxes)
        size_cls_score = tf.gather_nd(
            proposals_output[...,
                             5 + 2 * config.NH:5 + 2 * config.NH + config.NS],
            positive_idxes)
        size_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=size_cls_score, labels=size_cls_gt),
            name='size_cls_loss')
        # size residual loss
        size_cls_gt_onehot = tf.one_hot(size_cls_gt,
                                        depth=config.NS,
                                        on_value=1,
                                        off_value=0,
                                        axis=-1)  # Np * NS
        size_cls_gt_onehot = tf.tile(
            tf.expand_dims(tf.to_float(size_cls_gt_onehot), -1),
            [1, 1, 3])  # Np * NS * 3

        mean_size_arr_expand = tf.expand_dims(
            tf.constant(class_mean_size, dtype=tf.float32), 0)  # (1, NS, 3)
        mean_size_label = tf.reduce_sum(size_cls_gt_onehot *
                                        mean_size_arr_expand,
                                        axis=[1])  # (P, 3)
        size_residual_gt = tf.gather_nd(
            bboxes_size_residuals_gt,
            positive_gt_idxes) / mean_size_label  # Np * 3
        size_residual_predicted = tf.reshape(
            tf.gather_nd(
                proposals_output[..., 5 + 2 * config.NH + config.NS:5 +
                                 2 * config.NH + 4 * config.NS],
                positive_idxes), (-1, config.NS, 3))  # Np * NS * 3
        size_residual_loss = tf.reduce_mean(tf.reduce_sum(tf.losses.huber_loss(
            labels=size_residual_gt,
            predictions=tf.reduce_sum(size_residual_predicted *
                                      tf.to_float(size_cls_gt_onehot),
                                      axis=1),
            reduction=tf.losses.Reduction.NONE),
                                                          axis=-1),
                                            name='size_residual_loss')

        box_loss = tf.identity(center_loss + 0.1 * heading_cls_loss +
                               heading_residual_loss + 0.1 * size_cls_loss +
                               size_residual_loss,
                               name='box_loss')

        # semantic loss
        sem_cls_score = tf.gather_nd(proposals_output[..., -config.NC:],
                                     positive_idxes)
        sem_cls_gt = tf.gather_nd(bboxes_labels_gt, positive_gt_idxes)  # Np
        sem_cls_loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=sem_cls_score, labels=sem_cls_gt),
            name='sem_cls_loss')
        sem_correct = tf.cast(tf.nn.in_top_k(sem_cls_score, sem_cls_gt, 1),
                              tf.float32,
                              name='sem_correct')
        sem_accuracy = tf.reduce_mean(sem_correct, name='sem_accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        # summary.add_moving_summary(obj_accuracy, sem_accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        # no weight decay
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='regularize_loss')

        total_cost = vote_reg_loss + 0.5 * obj_cls_loss + 1. * box_loss + 0.1 * sem_cls_loss
        total_cost = tf.identity(total_cost, name='total_loss')
        summary.add_moving_summary(total_cost,
                                   vote_reg_loss,
                                   obj_cls_loss,
                                   box_loss,
                                   center_loss,
                                   heading_cls_loss,
                                   heading_residual_loss,
                                   size_cls_loss,
                                   size_residual_loss,
                                   sem_cls_loss,
                                   wd_cost,
                                   obj_accuracy,
                                   sem_accuracy,
                                   decay=0)
        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/W', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Exemple #49
0
    def _build_graph(self, inputs):
        # sImg2d # sImg the projection 2D, reshape from

        vol3d, img2d, condition = inputs  # Split the input

        with tf.variable_scope('gen'):
            # Step 0; run thru 3d encoder
            with tf.variable_scope('encoder_3d'):
                vol2d = self.vol3d_encoder(vol3d)
            # Step 1: Run thru the encoder
            with tf.variable_scope('encoder_vgg19'):
                vol2d_encoded = self.vgg19_encoder(vol2d)
                img2d_encoded = self.vgg19_encoder(img2d)
            # Step 2: Run thru the adain block to get t=AdIN(f(c), f(s))
            with tf.variable_scope('style_transfer'):
                merge_encoded = self._build_adain_layers(
                    vol2d_encoded, img2d_encoded)
                condition = tf.reshape(condition,
                                       [])  # Make 0 rank for condition
                chose_encoded = tf.cond(
                    condition >
                    0,  # if istest turns on, perform statistical transfering
                    lambda: tf.identity(merge_encoded),
                    lambda: tf.identity(vol2d_encoded
                                        ))  #else get the img2d_encoded
                img2d_encoded = tf.identity(img2d_encoded)

            # Step 3: Run thru the decoder to get the paint image
            with tf.variable_scope('decoder_vgg19'):
                vol2d_decoded = self.vgg19_decoder(chose_encoded)
                img2d_decoded = self.vgg19_decoder(img2d_encoded)

            with tf.variable_scope('decoder_3d'):
                vol3d_decoded = self.vol3d_decoder(vol2d_decoded)
                img3d_decoded = self.vol3d_decoder(img2d_decoded)

            # Step 0; run thru 3d encoder
            # with tf.variable_scope('encoder_3d'):
            # 	img3d_encoded = self.vol3d_encoder(img3d_decoded)

            # # Step 3: Run thru the decoder to get the paint image
            # with tf.variable_scope('decoder_vgg19'):
            # 	vol3d_decoded = self.vgg19_decoder(chose_encoded)
            # 	img3d_decoded = self.vgg19_decoder(img2d_encoded)

            # # Step 0; run thru 3d encoder
            # with tf.variable_scope('encoder_3d'):
            # 	img3d_encoded = self.vol3d_encoder(img3d_decoded)

        #
        # Build losses here
        #
        with tf.name_scope('losses'):
            losses = []
            # Content loss between t and f(g(t))
            # loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d')
            loss_vol3d = tf.reduce_mean(tf.abs(vol3d - vol3d_decoded),
                                        name='loss_vol3d')
            # loss_vol2d = tf.reduce_mean(tf.abs(vol2d - vol2d_decoded), name='loss_vol2d')
            loss_img2d = tf.reduce_mean(tf.abs(img2d - img2d_decoded),
                                        name='loss_img2d')
            # loss_img3d = tf.reduce_mean(tf.abs(img2d - img3d_encoded), name='loss_img3d')
            # loss_img3d = tf.reduce_mean(tf.abs(img3d - img3d_decoded), name='loss_img3d')

            add_moving_summary(loss_vol3d)
            # add_moving_summary(loss_vol2d)
            add_moving_summary(loss_img2d)
            # add_moving_summary(loss_img3d)

            losses.append(2e0 * loss_vol3d)
            # losses.append(1e0*loss_vol2d)
            losses.append(1e0 * loss_img2d)
            # losses.append(1e0*loss_img3d)
        self.cost = tf.reduce_sum(losses, name='self.cost')
        add_moving_summary(self.cost)

        out_vol3d = tf.identity(vol3d, name='out_vol3d')
        out_vol3d_decoded = tf.identity(vol3d_decoded,
                                        name='out_vol3d_decoded')
        with tf.name_scope('visualization'):

            def tf_squeeze(any_tensor):
                return tf.reshape(tf.squeeze(any_tensor), [1, DIMY, DIMX, 3])

            mid = 128
            # viz_vol_0 = vol3d[mid-2:mid-1,...]
            # viz_vol_1 = vol3d[mid-1:mid-0,...]
            # viz_vol_2 = vol3d[mid+0:mid+1,...]
            # viz_vol_3 = vol3d[mid+1:mid+2,...]

            # viz_vol_4 = vol3d_decoded[mid-2:mid-1,...]
            # viz_vol_5 = vol3d_decoded[mid-1:mid-0,...]
            # viz_vol_6 = vol3d_decoded[mid+0:mid+1,...]
            # viz_vol_7 = vol3d_decoded[mid+1:mid+2,...]
            viz_vol_1 = tf_squeeze(vol3d[mid:mid + 1, ...])
            viz_vol_2 = tf_squeeze(vol3d[:, mid:mid + 1, ...])
            viz_vol_3 = tf_squeeze(vol3d[:, :, mid:mid + 1, ...])
            viz_vol_0 = tf_squeeze(tf.zeros_like(viz_vol_1))

            viz_vol_5 = tf_squeeze(vol3d_decoded[mid:mid + 1, ...])
            viz_vol_6 = tf_squeeze(vol3d_decoded[:, mid:mid + 1, ...])
            viz_vol_7 = tf_squeeze(vol3d_decoded[:, :, mid:mid + 1, ...])
            viz_vol_4 = tf_squeeze(tf.zeros_like(viz_vol_5))

            viz_vol_8 = vol2d
            # viz_vol_9 = vol2d_decoded
            ####
            # viz_img_0 = img3d_decoded[mid-2:mid-1,...]
            # viz_img_1 = img3d_decoded[mid-1:mid-0,...]
            # viz_img_2 = img3d_decoded[mid+0:mid+1,...]
            # viz_img_3 = img3d_decoded[mid+1:mid+2,...]

            viz_img_1 = tf_squeeze(img3d_decoded[mid:mid + 1, ...])
            viz_img_2 = tf_squeeze(img3d_decoded[:, mid:mid + 1, ...])
            viz_img_3 = tf_squeeze(img3d_decoded[:, :, mid:mid + 1, ...])
            viz_img_0 = tf_squeeze(tf.zeros_like(viz_img_1))

            viz_img_4 = img2d
            viz_img_5 = img2d_decoded
            # viz_img_6 = img3d_encoded

            viz_zeros = tf.zeros_like(img2d)
            # Visualization
            viz = tf.concat([
                tf.concat(
                    [viz_vol_1, viz_vol_2, viz_vol_3, viz_vol_8, viz_img_4],
                    2),
                tf.concat(
                    [viz_vol_5, viz_vol_6, viz_vol_7, viz_zeros, viz_zeros],
                    2),
                tf.concat(
                    [viz_img_1, viz_img_2, viz_img_3, viz_img_5, viz_img_4],
                    2),
            ], 1)

            viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz')
            tf.summary.image('colorized', viz, max_outputs=50)
Exemple #50
0
def feature_to_prediction_and_loss(scope_name,
                                   l,
                                   label,
                                   num_classes,
                                   prediction_feature,
                                   ch_dim,
                                   label_smoothing=0,
                                   dense_dropout_keep_prob=1.0,
                                   is_last=True):
    """
        Given the feature l at scope_name, compute a classifier.
    """
    with tf.variable_scope(scope_name):
        n_dim = len(l.get_shape().as_list())
        if n_dim == 4 and not is_last:
            with tf.variable_scope('aux_preprocess'):
                l = tf.nn.relu(l)
                l = AvgPooling('pool',
                               l,
                               pool_size=5,
                               strides=3,
                               padding='valid')
                l = Conv2D('conv_proj',
                           l,
                           128,
                           1,
                           strides=1,
                           activation=BNReLU)
                shape = l.get_shape().as_list()
                if ch_dim != 1:
                    shape = shape[1:3]
                else:
                    shape = shape[2:4]
                l = Conv2D('conv_flat',
                           l,
                           768,
                           shape,
                           strides=1,
                           padding='valid',
                           activation=BNReLU)
                l = tf.layers.flatten(l)
        else:
            l = BNReLU('bnrelu_pred', l)
            ch_in = _get_dim(l, ch_dim)
            if prediction_feature == '1x1':
                ch_out = ch_in
                if n_dim == 4:
                    l = Conv2D('conv1x1', l, ch_out, 1)
                else:
                    assert n_dim == 2, n_dim
                    l = FullyConnected('fc1x1',
                                       l,
                                       ch_out,
                                       activation=tf.identity)
                l = BNReLU('bnrelu1x1', l)

            elif prediction_feature == 'msdense':
                assert n_dim == 2, n_dim
                ch_inter = ch_in
                l = Conv2D('conv1x1_0', l, ch_inter, 3, strides=2)
                l = BNReLU('bnrelu1x1_0', l)
                l = Conv2D('conv1x1_1', l, ch_inter, 3, strides=2)
                l = BNReLU('bnrelu1x1_1', l)

            elif prediction_feature == 'bn':
                l = BatchNorm('bn', l)

            else:
                # Do nothing to the input feature
                pass
            if n_dim > 2:
                l = GlobalAvgPooling('gap', l)

        variables = []
        if num_classes > 0:
            if is_last:
                l = Dropout('drop_pre_fc',
                            l,
                            keep_prob=dense_dropout_keep_prob)
            logits = FullyConnected('linear',
                                    l,
                                    num_classes,
                                    activation=tf.identity)
            variables.append(logits.variables.W)
            variables.append(logits.variables.b)
            tf.nn.softmax(logits, name='preds')
            ## local cost/error_rate
            if label_smoothing > 0:
                one_hot_labels = tf.one_hot(label, num_classes)
                cost = tf.losses.softmax_cross_entropy(\
                    onehot_labels=one_hot_labels, logits=logits,
                    label_smoothing=label_smoothing)
            else:
                cost = tf.nn.sparse_softmax_cross_entropy_with_logits(\
                    logits=logits, labels=label)
            cost = tf.reduce_mean(cost, name='cross_entropy_loss')
            add_moving_summary(cost)

            def prediction_incorrect(logits,
                                     label,
                                     topk=1,
                                     name='incorrect_vector'):
                return tf.cast(tf.logical_not(
                    tf.nn.in_top_k(logits, label, topk)),
                               tf.float32,
                               name=name)

            wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
            add_moving_summary(tf.reduce_mean(wrong, name='train_error'))
            wrong5 = prediction_incorrect(logits, label, 5, name='wrong-top5')
            add_moving_summary(tf.reduce_mean(wrong5, name='train-error-top5'))
        else:
            # for regression:
            pred = FullyConnected('linear', l, 1, activation=tf.identity)
            variables.append(pred.variables.W)
            variables.append(pred.variables.b)
            pred = tf.nn.relu(pred)
            tf.identity(pred, name='preds')
            cost = tf.reduce_mean(0.5 * (pred - label)**2,
                                  name='mean_square_error')
            add_moving_summary(cost)
        return cost, variables
Exemple #51
0
    def _build_graph(self, inputs):
        G = tf.get_default_graph()  # For round
        tf.local_variables_initializer()
        tf.global_variables_initializer()
        pi, pm, pl, ui, um, ul = inputs
        pi = cvt2tanh(pi)
        pm = cvt2tanh(pm)
        pl = cvt2tanh(pl)
        ui = cvt2tanh(ui)
        um = cvt2tanh(um)
        ul = cvt2tanh(ul)

        # def tf_membr(label):
        # 	with freeze_variables():
        # 		label = np_2imag(label, maxVal=MAX_LABEL)
        # 		label = np.squeeze(label) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one
        # 		# label, nb_labels = skimage.measure.label(color, return_num=True)
        # 		# label = np.expand_dims(label, axis=-1).astype(np.float32) # Modify here for batch
        # 		# for z in range(membr.shape[0]):
        # 		# 	membr[z,...] = 1-skimage.segmentation.find_boundaries(np.squeeze(label[z,...]), mode='thick') #, mode='inner'
        # 		membr = 1-skimage.segmentation.find_boundaries(np.squeeze(label), mode='thick') #, mode='inner'
        # 		membr = np.expand_dims(membr, axis=-1).astype(np.float32)
        # 		membr = np.expand_dims(membr, axis=0).astype(np.float32)
        # 		membr = np_2tanh(membr, maxVal=1.0)
        # 		membr = np.reshape(membr, label.shape)
        # 		return membr

        # def tf_label(color):
        # 	with freeze_variables():
        # 		color = np_2imag(color, maxVal=MAX_LABEL)
        # 		color = np.squeeze(color) # Unimplemented: exceptions.NotImplementedError: Only for images of dimension 1-3 are supported, got a 4D one
        # 		label, nb_labels = skimage.measure.label(color, return_num=True)
        # 		label = np.expand_dims(label, axis=-1).astype(np.float32)
        # 		label = np.expand_dims(label, axis=0).astype(np.float32)
        # 		label = np_2tanh(label, maxVal=MAX_LABEL)
        # 		label = np.reshape(label, color.shape)
        # 		return label

        def tf_rand_score(x1, x2):
            return 1.0 - adjusted_rand_score(x1.flatten(), x2.flatten())

        def rounded(label, factor=MAX_LABEL, name='quantized'):
            with G.gradient_override_map({"Round": "Identity"}):
                with freeze_variables():
                    with tf.name_scope(name=name):
                        label = cvt2imag(label, maxVal=factor)
                        label = tf.round(label)
                        label = cvt2tanh(label, maxVal=factor)
                    return tf.identity(label, name=name)


        with argscope([Conv2D, Deconv2D, FullyConnected],
             W_init=tf.truncated_normal_initializer(stddev=0.02),
             use_bias=False), \
          argscope(BatchNorm, gamma_init=tf.random_uniform_initializer()), \
          argscope([Conv2D, Deconv2D, BatchNorm], data_format='NHWC'), \
          argscope(LeakyReLU, alpha=0.2):

            with tf.variable_scope('gen'):
                # Real pair image 4 gen
                with tf.variable_scope('I2M'):
                    pim, feat_im = self.generator(pi)
                with tf.variable_scope('M2L'):
                    piml, feat_iml = self.generator(pim)
                    pml, feat_ml = self.generator(pm)
                    # piml  = tf.py_func(tf_label, [(pim)], tf.float32)
                    # pml   = tf.py_func(tf_label, [(pm)], tf.float32)
                    # print pim
                    # print piml
                # with tf.variable_scope('L2M'):
                # # with freeze_variables():
                # 	pimlm = self.generator(piml) #
                # 	plm   = self.generator(pl)
                # 	pmlm  = self.generator(pml)
                # 	# pimlm = tf.py_func(tf_membr, [(piml)], tf.float32) #
                # 	# plm   = tf.py_func(tf_membr, [(pl)	], tf.float32)
                # 	# pmlm  = tf.py_func(tf_membr, [(pml)	], tf.float32)
                # 	# print piml
                # 	# print pimlm
                # with tf.variable_scope('M2I'):
                # 	pimlmi = self.generator(pimlm) #
                # 	pimi   = self.generator(pim)

                # # Real pair label 4 gen
                # with tf.variable_scope('L2M'):
                # # with freeze_variables():
                # 	plm = self.generator(pl)
                # 	# plm  = tf.py_func(tf_membr, [(pl)	, tf.float32])
                # with tf.variable_scope('M2I'):
                # 	plmi = self.generator(plm)
                # 	pmi  = self.generator(pi)
                # with tf.variable_scope('I2M'):
                # 	plmim = self.generator(plmi) #
                # 	pim   = self.generator(pi)
                # 	pmim  = self.generator(pmi)

                # with tf.variable_scope('M2L'):
                # 	plmiml = self.generator(plmim) #
                # 	plml   = self.generator(plm)
                # 	# plmiml = tf.py_func(tf_label, [(plmim)], tf.float32)
                # 	# plml   = tf.py_func(tf_label, [(plm)], tf.float32)

            with tf.variable_scope('discrim'):
                # with tf.variable_scope('I'):
                # 	i_dis_real 			  = self.discriminator(ui)
                # 	i_dis_fake_from_label = self.discriminator(plmi)
                with tf.variable_scope('M'):
                    m_dis_real = self.discriminator(um)
                    m_dis_fake_from_image = self.discriminator(pim)
                    # m_dis_fake_from_label = self.discriminator(plm)
                with tf.variable_scope('L'):
                    l_dis_real = self.discriminator(ul)
                    l_dis_fake_from_image = self.discriminator(piml)

        piml = rounded(piml)  #
        pml = rounded(pml)
        # plmiml = rounded(plmiml) #
        # plml   = rounded(plml)

        # with tf.name_scope('Recon_I_loss'):
        # 	recon_imi 		= tf.reduce_mean(tf.abs((pi) - (pimi)), name='recon_imi')
        # 	recon_lmi 		= tf.reduce_mean(tf.abs((pi) - (plmi)), name='recon_lmi')
        # 	recon_imlmi 	= tf.reduce_mean(tf.abs((pi) - (pimlmi)), name='recon_imlmi') #

        with tf.name_scope('Recon_L_loss'):
            # recon_lml 		= tf.reduce_mean(tf.abs((pl) - (plml)), name='recon_lml')
            recon_iml = tf.reduce_mean(tf.abs((pl) - (piml)), name='recon_iml')
            # recon_lmiml 	= tf.reduce_mean(tf.abs((pl) - (plmiml)), name='recon_lmiml') #

        with tf.name_scope('Recon_M_loss'):
            # recon_mim 		= tf.reduce_mean(tf.abs((pm) - (pmim)), name='recon_mim')
            # recon_mlm 		= tf.reduce_mean(tf.abs((pm) - (pmlm)), name='recon_mlm')

            recon_im = tf.reduce_mean(tf.abs((pm) - (pim)), name='recon_im')
            # recon_lm 		= tf.reduce_mean(tf.abs((pm) - (plm)), name='recon_lm')

        with tf.name_scope('GAN_loss'):
            # G_loss_IL, D_loss_IL = self.build_losses(i_dis_real, i_dis_fake_from_label, name='IL')
            G_loss_LI, D_loss_LI = self.build_losses(l_dis_real,
                                                     l_dis_fake_from_image,
                                                     name='LL')
            G_loss_MI, D_loss_MI = self.build_losses(m_dis_real,
                                                     m_dis_fake_from_image,
                                                     name='MI')
            # G_loss_ML, D_loss_ML = self.build_losses(m_dis_real, m_dis_fake_from_label, name='ML')

        # custom loss for membr
        with tf.name_scope('membr_loss'):

            def membr_loss(y_true, y_pred, name='membr_loss'):
                return tf.reduce_mean(tf.subtract(
                    binary_cross_entropy(cvt2imag(y_true, maxVal=1.0),
                                         cvt2imag(y_pred, maxVal=1.0)),
                    dice_coe(cvt2imag(y_true, maxVal=1.0),
                             cvt2imag(y_pred, maxVal=1.0),
                             axis=[1, 2, 3],
                             loss_type='jaccard')),
                                      name=name)

            membr_im = membr_loss(pm, pim, name='membr_im')
            # print membr_im
            # membr_lm = membr_loss(pm, plm, name='membr_lm')
            # membr_imlm = membr_loss(pm, pimlm, name='membr_imlm')
            # membr_lmim = membr_loss(pm, plmim, name='membr_lmim')
            # membr_mlm = membr_loss(pm, pmlm, name='membr_mlm')
            # membr_mim = membr_loss(pm, pmim, name='membr_mim')
        # custom loss for label
        with tf.name_scope('label_loss'):

            def label_loss(y_true_L, y_pred_L, y_grad_M, name='label_loss'):
                g_mag_grad_M = cvt2imag(y_grad_M, maxVal=1.0)
                mag_grad_L = magnitute_central_difference(y_pred_L,
                                                          name='mag_grad_L')
                cond = tf.greater(mag_grad_L, tf.zeros_like(mag_grad_L))
                thresholded_mag_grad_L = tf.where(
                    cond,
                    tf.ones_like(mag_grad_L),
                    tf.zeros_like(mag_grad_L),
                    name='thresholded_mag_grad_L')

                gtv_guess = tf.multiply(g_mag_grad_M,
                                        thresholded_mag_grad_L,
                                        name='gtv_guess')
                loss_gtv_guess = tf.reduce_mean(gtv_guess,
                                                name='loss_gtv_guess')

                thresholded_mag_grad_L = cvt2tanh(thresholded_mag_grad_L,
                                                  maxVal=1.0)
                gtv_guess = cvt2tanh(gtv_guess, maxVal=1.0)
                return loss_gtv_guess, thresholded_mag_grad_L

            label_iml, g_iml = label_loss(None, piml, pim, name='label_iml')
            # label_lml, g_lml = label_loss(None, plml, plm, name='label_lml')
            # label_lmiml, g_lmiml = label_loss(None, plmiml, plmim, name='label_lmiml')
            label_ml, g_ml = label_loss(None, pml, pm, name='label_loss_ml')

        # custom loss for tf_rand_score
        with tf.name_scope('rand_loss'):
            rand_iml = tf.reduce_mean(
                tf.cast(tf.py_func(tf_rand_score, [piml, pl], tf.float64),
                        tf.float32))
            rand_ml = tf.reduce_mean(
                tf.cast(tf.py_func(tf_rand_score, [pml, pl], tf.float64),
                        tf.float32))

        with tf.name_scope('discrim_loss'):

            def regDLF(y_true,
                       y_pred,
                       alpha=1,
                       beta=1,
                       gamma=0.01,
                       delta_v=0.5,
                       delta_d=1.5,
                       name='loss_discrim'):
                def tf_norm(inputs, axis=1, epsilon=1e-7, name='safe_norm'):
                    squared_norm = tf.reduce_sum(tf.square(inputs),
                                                 axis=axis,
                                                 keep_dims=True)
                    safe_norm = tf.sqrt(squared_norm + epsilon)
                    return tf.identity(safe_norm, name=name)

                ###
                y_true = tf.reshape(y_true, [DIMZ * DIMY * DIMX])

                nDim = tf.shape(y_pred)[-1]
                X = tf.reshape(y_pred, [DIMZ * DIMY * DIMX, nDim])
                uniqueLabels, uniqueInd = tf.unique(y_true)

                numUnique = tf.size(
                    uniqueLabels)  # Get the number of connected component

                Sigma = tf.unsorted_segment_sum(X, uniqueInd, numUnique)
                # ones_Sigma = tf.ones((tf.shape(X)[0], 1))
                ones_Sigma = tf.ones_like(X)
                ones_Sigma = tf.unsorted_segment_sum(ones_Sigma, uniqueInd,
                                                     numUnique)
                mu = tf.divide(Sigma, ones_Sigma)

                Lreg = tf.reduce_mean(tf.norm(mu, axis=1, ord=1))

                T = tf.norm(tf.subtract(tf.gather(mu, uniqueInd), X),
                            axis=1,
                            ord=1)
                T = tf.divide(T, Lreg)
                T = tf.subtract(T, delta_v)
                T = tf.clip_by_value(T, 0, T)
                T = tf.square(T)

                ones_Sigma = tf.ones_like(uniqueInd, dtype=tf.float32)
                ones_Sigma = tf.unsorted_segment_sum(ones_Sigma, uniqueInd,
                                                     numUnique)
                clusterSigma = tf.unsorted_segment_sum(T, uniqueInd, numUnique)
                clusterSigma = tf.divide(clusterSigma, ones_Sigma)

                # Lvar = tf.reduce_mean(clusterSigma, axis=0)
                Lvar = tf.reduce_mean(clusterSigma)

                mu_interleaved_rep = tf.tile(mu, [numUnique, 1])
                mu_band_rep = tf.tile(mu, [1, numUnique])
                mu_band_rep = tf.reshape(mu_band_rep,
                                         (numUnique * numUnique, nDim))

                mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep)
                # Remove zero vector
                # intermediate_tensor = reduce_sum(tf.abs(x), 1)
                # zero_vector = tf.zeros(shape=(1,1), dtype=tf.float32)
                # bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
                # omit_zeros = tf.boolean_mask(x, bool_mask)
                intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), 1)
                zero_vector = tf.zeros(shape=(1, 1), dtype=tf.float32)
                bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
                omit_zeros = tf.boolean_mask(mu_diff, bool_mask)
                mu_diff = tf.expand_dims(omit_zeros, axis=1)
                print mu_diff
                mu_diff = tf.norm(mu_diff, ord=1)
                # squared_norm = tf.reduce_sum(tf.square(s), axis=axis,keep_dims=True)
                # safe_norm = tf.sqrt(squared_norm + epsilon)
                # squared_norm = tf.reduce_sum(tf.square(omit_zeros), axis=-1,keep_dims=True)
                # safe_norm = tf.sqrt(squared_norm + 1e-6)
                # mu_diff = safe_norm

                mu_diff = tf.divide(mu_diff, Lreg)

                mu_diff = tf.subtract(2 * delta_d, mu_diff)
                mu_diff = tf.clip_by_value(mu_diff, 0, mu_diff)
                mu_diff = tf.square(mu_diff)

                numUniqueF = tf.cast(numUnique, tf.float32)
                Ldist = tf.reduce_mean(mu_diff)

                L = alpha * Lvar + beta * Ldist + gamma * Lreg
                print L
                print Ldist
                print Lvar
                print Lreg
                return tf.squeeze(L, name=name)

            discrim_im = regDLF(cvt2imag(pm, maxVal=1.0),
                                feat_im,
                                name='discrim_im')
            discrim_iml = regDLF(cvt2imag(pl, maxVal=MAX_LABEL),
                                 feat_iml,
                                 name='discrim_iml')
            discrim_ml = regDLF(cvt2imag(pl, maxVal=MAX_LABEL),
                                feat_ml,
                                name='discrim_ml')

        self.g_loss = tf.add_n(
            [
                #(recon_imi), # + recon_lmi + recon_imlmi), #
                (recon_iml),  # + recon_lml + recon_lmiml), #
                (recon_im),  #  + recon_lm + recon_mim + recon_mlm),
                (rand_iml),  # + rand_lml + rand_lmiml), #
                (rand_ml),  #  + rand_lm + rand_mim + rand_mlm),
                # (G_loss_IL + G_loss_LI + G_loss_MI + G_loss_ML),
                (G_loss_LI + G_loss_MI),
                (discrim_im + discrim_iml + discrim_ml),
                (
                    membr_im
                ),  # + membr_lm + membr_imlm + membr_lmim + membr_mlm + membr_mim),
                # (label_iml + label_lml + label_lmiml + label_ml)
                (label_iml + label_ml)
            ],
            name='G_loss_total')
        self.d_loss = tf.add_n(
            [
                # (D_loss_IL + D_loss_LI + D_loss_MI + D_loss_ML),
                (D_loss_LI + D_loss_MI),
            ],
            name='D_loss_total')

        wd_g = regularize_cost('gen/.*/W',
                               l2_regularizer(1e-5),
                               name='G_regularize')
        wd_d = regularize_cost('discrim/.*/W',
                               l2_regularizer(1e-5),
                               name='D_regularize')

        self.g_loss = tf.add(self.g_loss, wd_g, name='g_loss')
        self.d_loss = tf.add(self.d_loss, wd_d, name='d_loss')

        self.collect_variables()

        add_moving_summary(self.d_loss, self.g_loss)
        add_moving_summary(
            recon_iml,
            recon_im,
            label_iml,
            label_ml,
            # rand_iml,
            # rand_ml,
            # membr_im
            # recon_imi, recon_lmi, recon_imlmi,
            # recon_lml, recon_iml, recon_lmiml,
            # recon_mim, recon_mlm, recon_im , recon_lm,
        )

        viz = tf.concat(
            [
                tf.concat([ui, pi, pim, piml, g_iml], 2),
                # tf.concat([ul, pl, plm, plmi, plmim, plmiml], 2),
                tf.concat([um, pl, pm, pml, g_ml], 2),
                # tf.concat([pl, pl, g_iml, g_lml, g_lmiml,   g_ml], 2),
            ],
            1)
        # add_moving_summary(
        # 	recon_imi, recon_lmi,# recon_imlmi,
        # 	recon_lml, recon_iml,# recon_lmiml,
        # 	recon_mim, recon_mlm, recon_im , recon_lm,
        # 	)
        # viz = tf.concat([tf.concat([ui, pi, pim, piml], 2),
        # 				 tf.concat([ul, pl, plm, plmi], 2),
        # 				 tf.concat([um, pm, pmi, pmim], 2),
        # 				 tf.concat([um, pm, pml, pmlm], 2),
        # 				 ], 1)
        viz = cvt2imag(viz)
        viz = tf.cast(tf.clip_by_value(viz, 0, 255), tf.uint8, name='viz')
        tf.summary.image('colorized', viz, max_outputs=50)
Exemple #52
0
def rpn_losses_iou(anchor_labels, anchor_boxes, gt_boxes, rpn_boxes,
                   label_logits, box_logits, iou_logits):
    """
    Args:
        anchor_labels: fHxfWxNA
        anchor_boxes: fHxfWxNAx4, encoded
        gt_boxes:
        rpn_boxes: fHxfWxNA decoded
        label_logits:  fHxfWxNA
        box_logits: fHxfWxNAx4
        iou_logits:  fHxfWxNA

    Returns:
        label_loss, box_loss, iou_loss
    """
    with tf.device('/cpu:0'):
        valid_mask = tf.stop_gradient(tf.not_equal(anchor_labels, -1))
        pos_mask = tf.stop_gradient(tf.equal(anchor_labels, 1))
        nr_valid = tf.stop_gradient(tf.count_nonzero(valid_mask,
                                                     dtype=tf.int32),
                                    name='num_valid_anchor')
        nr_pos = tf.identity(tf.count_nonzero(pos_mask, dtype=tf.int32),
                             name='num_pos_anchor')
        # nr_pos is guaranteed >0 in C4. But in FPN. even nr_valid could be 0.

        valid_anchor_labels = tf.boolean_mask(anchor_labels, valid_mask)
    valid_label_logits = tf.boolean_mask(label_logits, valid_mask)

    with tf.name_scope('label_metrics'):
        valid_label_prob = tf.nn.sigmoid(valid_label_logits)
        summaries = []
        with tf.device('/cpu:0'):
            for th in [0.5, 0.2, 0.1]:
                valid_prediction = tf.cast(valid_label_prob > th, tf.int32)
                nr_pos_prediction = tf.reduce_sum(valid_prediction,
                                                  name='num_pos_prediction')
                pos_prediction_corr = tf.count_nonzero(tf.logical_and(
                    valid_label_prob > th,
                    tf.equal(valid_prediction, valid_anchor_labels)),
                                                       dtype=tf.int32)
                placeholder = 0.5  # A small value will make summaries appear lower.
                recall = tf.to_float(tf.truediv(pos_prediction_corr, nr_pos))
                recall = tf.where(tf.equal(nr_pos, 0),
                                  placeholder,
                                  recall,
                                  name='recall_th{}'.format(th))
                precision = tf.to_float(
                    tf.truediv(pos_prediction_corr, nr_pos_prediction))
                precision = tf.where(tf.equal(nr_pos_prediction, 0),
                                     placeholder,
                                     precision,
                                     name='precision_th{}'.format(th))
                summaries.extend([precision, recall])
        add_moving_summary(*summaries)

    # Per-level loss summaries in FPN may appear lower due to the use of a small placeholder.
    # But the total RPN loss will be fine.  TODO make the summary op smarter
    placeholder = 0.
    ce_loss = tf.nn.sigmoid_cross_entropy_with_logits(
        labels=tf.to_float(valid_anchor_labels), logits=valid_label_logits)
    # label_loss = tf.reduce_sum(label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
    # label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')

    #    alpha = 0.75
    #    gamma = 2.0
    #    probs = tf.sigmoid(valid_label_logits)
    #    alpha_t = tf.ones_like(valid_label_logits) * alpha
    #    alpha_t = tf.where(valid_anchor_labels > 0, alpha_t, 1.0 - alpha_t)
    #    probs_t = tf.where(valid_anchor_labels > 0, probs, 1.0 - probs)
    #    weight_matrix = alpha_t * tf.pow((1.0 - probs_t), gamma)
    #    # label_loss = tf.reduce_sum(weight_matrix * label_loss) * (1. / cfg.RPN.BATCH_PER_IM)
    #
    #    label_loss = weight_matrix * ce_loss
    #
    #    #n_pos = tf.reduce_sum(valid_anchor_labels)
    #    n_false = tf.reduce_sum(tf.cast(tf.greater(ce_loss, -tf.log(0.5)), tf.float32))
    #    def has_pos():
    #        return tf.reduce_sum(label_loss) / tf.cast(n_false, tf.float32)
    #    def no_pos():
    #        return tf.reduce_sum(label_loss)
    #    label_loss = tf.cond(n_false > 0, has_pos, no_pos)
    #    label_loss = tf.where(tf.equal(nr_valid, 0), placeholder, label_loss, name='label_loss')
    # find the most wrongly classified examples:

    n_selected = cfg.FRCNN.BATCH_PER_IM
    n_selected = tf.cast(n_selected, tf.int32)
    n_selected = tf.minimum(n_selected, tf.size(valid_anchor_labels))

    #    label_loss = alpha_t * label_loss

    vals, _ = tf.nn.top_k(ce_loss, k=n_selected)
    try:
        th = vals[-1]
    except:
        th = 1
    selected_mask = ce_loss >= th
    loss_weight = tf.cast(selected_mask, tf.float32)
    label_loss = tf.reduce_sum(
        ce_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight)
    label_loss = tf.where(tf.equal(nr_valid, 0),
                          placeholder,
                          label_loss,
                          name='label_loss')

    pos_anchor_boxes = tf.boolean_mask(anchor_boxes, pos_mask)
    pos_box_logits = tf.boolean_mask(box_logits, pos_mask)
    delta = 1.0 / 9
    # box_loss = tf.losses.huber_loss(
    #    pos_anchor_boxes, pos_box_logits, delta=delta,
    #    reduction=tf.losses.Reduction.SUM) / delta
    box_loss = tf.losses.huber_loss(pos_anchor_boxes,
                                    pos_box_logits,
                                    reduction=tf.losses.Reduction.SUM)
    box_loss = box_loss * (50. / cfg.RPN.BATCH_PER_IM)
    box_loss = tf.where(tf.equal(nr_pos, 0),
                        placeholder,
                        box_loss,
                        name='box_loss')

    # iou loss: smooth l1 loss
    rpn_boxes = tf.reshape(rpn_boxes, [-1, 4])
    gt_boxes = tf.reshape(gt_boxes, [-1, 4])
    iou = pairwise_iou(rpn_boxes, gt_boxes)  # nxm
    max_iou = tf.reduce_max(iou, axis=1)
    # if only bg gt_boxes, all ious are 0.
    max_iou = tf.where(tf.equal(nr_pos, 0), tf.zeros_like(max_iou), max_iou)
    max_iou = tf.stop_gradient(tf.reshape(max_iou, [-1]),
                               name='rpn_box_gt_iou')

    iou_logits = tf.nn.sigmoid(iou_logits)
    iou_logits = tf.reshape(iou_logits, [-1])
    iou_loss = tf.losses.huber_loss(max_iou, iou_logits, reduction='none')

    n_selected = cfg.FRCNN.BATCH_PER_IM
    n_selected = tf.cast(n_selected, tf.int32)

    vals, _ = tf.nn.top_k(iou_loss, k=n_selected)
    th = vals[-1]
    selected_mask = iou_loss >= th
    loss_weight = tf.cast(selected_mask, tf.float32)
    iou_loss = tf.reduce_sum(
        iou_loss * loss_weight) * 1. / tf.reduce_sum(loss_weight)
    iou_loss = tf.identity(iou_loss, name='iou_loss')

    add_moving_summary(label_loss, box_loss, iou_loss, nr_valid, nr_pos)
    return label_loss, box_loss, iou_loss
Exemple #53
0
    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        input, nextinput = inputs
        initializer = tf.random_uniform_initializer(-0.05, 0.05)

        def get_basic_cell():
            cell = rnn.BasicLSTMCell(num_units=HIDDEN_SIZE,
                                     forget_bias=0.0,
                                     reuse=tf.get_variable_scope().reuse)
            if is_training:
                cell = rnn.DropoutWrapper(cell, output_keep_prob=DROPOUT)
            return cell

        cell = rnn.MultiRNNCell([get_basic_cell() for _ in range(NUM_LAYER)])

        def get_v(n):
            return tf.get_variable(n, [BATCH, HIDDEN_SIZE],
                                   trainable=False,
                                   initializer=tf.constant_initializer())

        state_var = [
            rnn.LSTMStateTuple(get_v('c{}'.format(k)), get_v('h{}'.format(k)))
            for k in range(NUM_LAYER)
        ]
        self.state = state_var = tuple(state_var)

        embeddingW = tf.get_variable('embedding', [VOCAB_SIZE, HIDDEN_SIZE],
                                     initializer=initializer)
        input_feature = tf.nn.embedding_lookup(
            embeddingW, input)  # B x seqlen x hiddensize
        input_feature = Dropout(input_feature, rate=DROPOUT)

        with tf.variable_scope('LSTM', initializer=initializer):
            input_list = tf.unstack(input_feature, num=SEQ_LEN,
                                    axis=1)  # seqlen x (Bxhidden)
            outputs, last_state = rnn.static_rnn(cell,
                                                 input_list,
                                                 state_var,
                                                 scope='rnn')

        # update the hidden state after a rnn loop completes
        update_state_ops = []
        for k in range(NUM_LAYER):
            update_state_ops.extend([
                tf.assign(state_var[k].c, last_state[k].c),
                tf.assign(state_var[k].h, last_state[k].h)
            ])

        # seqlen x (Bxrnnsize)
        output = tf.reshape(tf.concat(outputs, 1),
                            [-1, HIDDEN_SIZE])  # (Bxseqlen) x hidden
        logits = FullyConnected('fc',
                                output,
                                VOCAB_SIZE,
                                activation=tf.identity,
                                kernel_initializer=initializer,
                                bias_initializer=initializer)
        xent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=tf.reshape(nextinput, [-1]))

        with tf.control_dependencies(update_state_ops):
            self.cost = tf.truediv(tf.reduce_sum(xent_loss),
                                   tf.cast(BATCH, tf.float32),
                                   name='cost')  # log-perplexity

        perpl = tf.exp(self.cost / SEQ_LEN, name='perplexity')
        summary.add_moving_summary(perpl, self.cost)
Exemple #54
0
    def build_graph(self, *inputs):
        num_fpn_level = len(config.ANCHOR_STRIDES_FPN)
        assert len(config.ANCHOR_SIZES) == num_fpn_level
        is_training = get_current_tower_context().is_training
        image = inputs[0]
        input_anchors = inputs[1:1 + 2 * num_fpn_level]
        multilevel_anchor_labels = input_anchors[0::2]
        multilevel_anchor_boxes = input_anchors[1::2]
        gt_boxes, gt_labels = inputs[11], inputs[12]
        if config.MODE_MASK:
            gt_masks = inputs[-1]

        image = self.preprocess(image)  # 1CHW
        image_shape2d = tf.shape(image)[2:]  # h,w

        c2345 = resnet_fpn_backbone(image, config.RESNET_NUM_BLOCK)
        p23456 = fpn_model('fpn', c2345)

        # Multi-Level RPN Proposals
        multilevel_proposals = []
        rpn_loss_collection = []
        for lvl in range(num_fpn_level):
            rpn_label_logits, rpn_box_logits = rpn_head(
                'rpn', p23456[lvl], config.FPN_NUM_CHANNEL,
                len(config.ANCHOR_RATIOS))
            with tf.name_scope('FPN_lvl{}'.format(lvl + 2)):
                anchors = tf.constant(get_all_anchors_fpn()[lvl],
                                      name='rpn_anchor_lvl{}'.format(lvl + 2))
                anchors, anchor_labels, anchor_boxes = \
                    self.narrow_to_featuremap(p23456[lvl], anchors,
                                              multilevel_anchor_labels[lvl],
                                              multilevel_anchor_boxes[lvl])
                anchor_boxes_encoded = encode_bbox_target(
                    anchor_boxes, anchors)
                pred_boxes_decoded = decode_bbox_target(
                    rpn_box_logits, anchors)
                proposal_boxes, proposal_scores = generate_rpn_proposals(
                    tf.reshape(pred_boxes_decoded, [-1, 4]),
                    tf.reshape(rpn_label_logits, [-1]), image_shape2d,
                    config.TRAIN_FPN_NMS_TOPK
                    if is_training else config.TEST_FPN_NMS_TOPK)
                multilevel_proposals.append((proposal_boxes, proposal_scores))
                if is_training:
                    label_loss, box_loss = rpn_losses(anchor_labels,
                                                      anchor_boxes_encoded,
                                                      rpn_label_logits,
                                                      rpn_box_logits)
                    rpn_loss_collection.extend([label_loss, box_loss])

        # Merge proposals from multi levels, pick top K
        proposal_boxes = tf.concat([x[0] for x in multilevel_proposals],
                                   axis=0)  # nx4
        proposal_scores = tf.concat([x[1] for x in multilevel_proposals],
                                    axis=0)  # n
        proposal_topk = tf.minimum(
            tf.size(proposal_scores), config.TRAIN_FPN_NMS_TOPK
            if is_training else config.TEST_FPN_NMS_TOPK)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=proposal_topk,
                                                    sorted=False)
        proposal_boxes = tf.gather(proposal_boxes, topk_indices)

        if is_training:
            rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
        else:
            # The boxes to be used to crop RoIs.
            rcnn_boxes = proposal_boxes

        roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7)

        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_2fc_head(
            'fastrcnn', roi_feature_fastrcnn, config.NUM_CLASS)

        if is_training:
            # rpn loss is already defined above
            with tf.name_scope('rpn_losses'):
                rpn_total_label_loss = tf.add_n(rpn_loss_collection[::2],
                                                name='label_loss')
                rpn_total_box_loss = tf.add_n(rpn_loss_collection[1::2],
                                              name='box_loss')
                add_moving_summary(rpn_total_box_loss, rpn_total_label_loss)

            # fastrcnn loss:
            matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt)

            fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0),
                                            [-1])  # fg inds w.r.t all samples
            fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample)
            fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits,
                                               fg_inds_wrt_sample)

            fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training(
                image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes,
                fastrcnn_label_logits, fg_fastrcnn_box_logits)

            if config.MODE_MASK:
                # maskrcnn loss
                fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample)
                roi_feature_maskrcnn = multilevel_roi_align(
                    p23456[:4], fg_sampled_boxes, 14)
                mask_logits = maskrcnn_upXconv_head('maskrcnn',
                                                    roi_feature_maskrcnn,
                                                    config.NUM_CLASS,
                                                    4)  # #fg x #cat x 28 x 28

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    fg_sampled_boxes,
                    fg_inds_wrt_gt,
                    28,
                    pad_border=False)  # fg x 1x28x28
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels,
                                           target_masks_for_fg)
            else:
                mrcnn_loss = 0.0

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            total_cost = tf.add_n(
                rpn_loss_collection +
                [fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost],
                'total_cost')

            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            final_boxes, final_labels = self.fastrcnn_inference(
                image_shape2d, rcnn_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)
            if config.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
                roi_feature_maskrcnn = multilevel_roi_align(
                    p23456[:4], final_boxes, 14)
                mask_logits = maskrcnn_upXconv_head('maskrcnn',
                                                    roi_feature_maskrcnn,
                                                    config.NUM_CLASS,
                                                    4)  # #fg x #cat x 28 x 28
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.to_int32(final_labels) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx28x28
                tf.sigmoid(final_mask_logits, name='final_masks')
Exemple #55
0
    def build_graph(self, image, edgemap):
        image = image - tf.constant([104, 116, 122], dtype='float32')
        edgemap = tf.expand_dims(edgemap, 3, name='edgemap4d')

        def branch(name, l, up):
            with tf.variable_scope(name):
                l = Conv2D('convfc',
                           l,
                           1,
                           kernel_size=1,
                           activation=tf.identity,
                           use_bias=True,
                           kernel_initializer=tf.constant_initializer())
                while up != 1:
                    l = BilinearUpSample('upsample{}'.format(up), l, 2)
                    up = up / 2
                return l

        with argscope(Conv2D, kernel_size=3, activation=tf.nn.relu):
            l = Conv2D('conv1_1', image, 64)
            l = Conv2D('conv1_2', l, 64)
            b1 = branch('branch1', l, 1)
            l = MaxPooling('pool1', l, 2)

            l = Conv2D('conv2_1', l, 128)
            l = Conv2D('conv2_2', l, 128)
            b2 = branch('branch2', l, 2)
            l = MaxPooling('pool2', l, 2)

            l = Conv2D('conv3_1', l, 256)
            l = Conv2D('conv3_2', l, 256)
            l = Conv2D('conv3_3', l, 256)
            b3 = branch('branch3', l, 4)
            l = MaxPooling('pool3', l, 2)

            l = Conv2D('conv4_1', l, 512)
            l = Conv2D('conv4_2', l, 512)
            l = Conv2D('conv4_3', l, 512)
            b4 = branch('branch4', l, 8)
            l = MaxPooling('pool4', l, 2)

            l = Conv2D('conv5_1', l, 512)
            l = Conv2D('conv5_2', l, 512)
            l = Conv2D('conv5_3', l, 512)
            b5 = branch('branch5', l, 16)

        final_map = Conv2D('convfcweight',
                           tf.concat([b1, b2, b3, b4, b5], 3),
                           1,
                           kernel_size=1,
                           kernel_initializer=tf.constant_initializer(0.2),
                           use_bias=False,
                           activation=tf.identity)
        costs = []
        for idx, b in enumerate([b1, b2, b3, b4, b5, final_map]):
            output = tf.nn.sigmoid(b, name='output{}'.format(idx + 1))
            xentropy = class_balanced_sigmoid_cross_entropy(
                b, edgemap, name='xentropy{}'.format(idx + 1))
            costs.append(xentropy)

        # some magic threshold
        pred = tf.cast(tf.greater(output, 0.5), tf.int32, name='prediction')
        wrong = tf.cast(tf.not_equal(pred, edgemap), tf.float32)
        wrong = tf.reduce_mean(wrong, name='train_error')

        if get_current_tower_context().is_training:
            wd_w = tf.train.exponential_decay(2e-4, get_global_step_var(),
                                              80000, 0.7, True)
            wd_cost = tf.multiply(wd_w,
                                  regularize_cost('.*/W', tf.nn.l2_loss),
                                  name='wd_cost')
            costs.append(wd_cost)

            add_param_summary(('.*/W', ['histogram']))  # monitor W
            total_cost = tf.add_n(costs, name='cost')
            add_moving_summary(wrong, total_cost, *costs)
            return total_cost
Exemple #56
0
    def build_graph(self, image, label):
        is_training = get_current_tower_context().is_training

        fw, fa, fg = get_dorefa(BITW, BITA, BITG)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            # don't binarize first and last layer
            if not name.endswith('W') or 'conv0' in name or 'fc' in name:
                return v
            else:
                logger.info("Binarizing weight {}".format(v.op.name))
                return fw(v)
                #return ternarize(v)

        def cabs(x):
            return tf.minimum(1.0, tf.abs(x), name='cabs')

        def activate(x):
            return fa(cabs(x))

        image = image / 256.0;       zp=0.25

        with remap_variables(binarize_weight), \
                argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \
                argscope(Conv2D, use_bias=False):
            logits = (LinearWrap(image)
                      .Conv2D('conv0', np.round(48*zp), 5, padding='VALID', use_bias=True)
                      .MaxPooling('pool0', 2, padding='SAME')
                      .apply(activate)
                      # 18
                      .Conv2D('conv1', np.round(64*zp), 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn1').apply(activate)

                      .Conv2D('conv2', np.round(64*zp), 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn2')
                      .MaxPooling('pool1', 2, padding='SAME')
                      .apply(activate)
                      # 9
                      .Conv2D('conv3', np.round(128*zp), 3, padding='VALID')
                      .apply(fg)
                      .BatchNorm('bn3').apply(activate)
                      # 7

                      .Conv2D('conv4', np.round(128*zp), 3, padding='SAME')
                      .apply(fg)
                      .BatchNorm('bn4').apply(activate)

                      .Conv2D('conv5', np.round(128*zp), 3, padding='VALID')
                      .apply(fg)
                      .BatchNorm('bn5').apply(activate)
                      # 5
                      .tf.nn.dropout(0.5 if is_training else 1.0)
                      .Conv2D('conv6', np.round(512*zp), 5, padding='VALID')
                      .apply(fg).BatchNorm('bn6')
                      .apply(cabs)
                      .FullyConnected('fc1', 10)())
        tf.nn.softmax(logits, name='output')

        # compute the number of failed samples
        wrong = tf.cast(tf.logical_not(tf.nn.in_top_k(logits, label, 1)), tf.float32, name='wrong_tensor')
        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        # weight decay on all W of fc layers
        wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7))

        add_param_summary(('.*/W', ['histogram', 'rms']))
        total_cost = tf.add_n([cost, wd_cost], name='cost')
        add_moving_summary(cost, wd_cost, total_cost)
        return total_cost
Exemple #57
0
    def build_losses(self, logits_real, logits_fake, logits_s_pred, logits_s_true, extra_g=0, l2_norm=0.00001):
        r"""D and G play two-player minimax game with value function :math:`V(G,D)`.

        .. math::

            min_G max_D V(D, G) = IE_{x \sim p_{data}} [log D(x)] + IE_{z \sim p_{fake}}
                [log (1 - D(G(z)))]

        Args:
            logits_real (tensorflow.Tensor): discrim logits from real samples.
            logits_fake (tensorflow.Tensor): discrim logits from fake samples from generator.
            extra_g(float):
            l2_norm(float): scale to apply L2 regularization.

        Returns:
            None

        """
        with tf.name_scope("GAN_loss"):
            score_real = tf.sigmoid(logits_real)
            score_fake = tf.sigmoid(logits_fake)
            tf.summary.histogram('score-real', score_real)
            tf.summary.histogram('score-fake', score_fake)

            score_s_pred = tf.sigmoid(logits_s_pred)
            tf.summary.histogram('score-s-pred', score_s_pred)

            with tf.name_scope("discrim"):
                d_loss_pos = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \
                    logits=logits_real, \
                    labels=tf.ones_like(logits_real)) * 0.7 + tf.random_uniform(tf.shape(logits_real), maxval=0.3), \
                    name='loss_real'
                )

                d_loss_neg = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \
                    logits=logits_fake, \
                    labels=tf.zeros_like(logits_fake)), \
                    name='loss_fake'
                )

                d_pos_acc = tf.reduce_mean(tf.cast(score_real > 0.5, tf.float32), name='accuracy_real')

                d_neg_acc = tf.reduce_mean(tf.cast(score_fake < 0.5, tf.float32), name='accuracy_fake')

                d_loss = 0.5 * d_loss_pos + 0.5 * d_loss_neg + \
                    tf.contrib.layers.apply_regularization(
                        tf.contrib.layers.l2_regularizer(l2_norm),
                        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "discrim"))

                self.d_loss = tf.identity(d_loss, name='loss')

            with tf.name_scope("fair"):
                f_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \
                    logits=logits_s_pred, \
                    labels=logits_s_true), \
                    name='loss_fair'
                )

                s_pred_acc = tf.reduce_mean(tf.cast(score_s_pred < 0.5, tf.float32), name='accuracy_s')

                f_loss = - f_loss
                # f_loss = f_loss + tf.contrib.layers.apply_regularization(tf.contrib.layers.l2_regularizer(l2_norm), tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "fair"))

                self.f_loss = tf.identity(f_loss, name='loss')

            with tf.name_scope("gen"):
                g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( \
                    logits=logits_fake, \
                    labels=tf.ones_like(logits_fake))) + \
                    tf.contrib.layers.apply_regularization(
                        tf.contrib.layers.l2_regularizer(l2_norm),
                        tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'gen'))
               
                g_loss = tf.identity(g_loss, name='loss')
                extra_g = tf.identity(extra_g, name='klloss')
                self.g_loss = tf.identity(g_loss + extra_g, name='final-g-loss')

            add_moving_summary(g_loss, extra_g, self.g_loss, self.d_loss, d_pos_acc, d_neg_acc, self.f_loss, s_pred_acc,

             decay=0.)
Exemple #58
0
    def _build_graph(self, inputs):
        inp, label = inputs
        is_training = get_current_tower_context().is_training

        fw, fa = get_dorefa(BITW, BITA)

        # monkey-patch tf.get_variable to apply fw
        def binarize_weight(v):
            name = v.op.name
            if not (name.endswith('W') or name.endswith('b')
                    ) or 'linear0' in name or 'last_linear' in name:
                print("Not quantizing", name)
                return v
            else:
                logger.info("Quantizing weight {}".format(v.op.name))
                return fw(v)

        def nonlin(x, name="activate"):
            return fa(tf.nn.relu(BNWithTrackedMults(x)))

        with remap_variables(binarize_weight), \
                argscope([FullyConnectedWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([BNReLUWithTrackedMults], network_complexity=self.network_complexity), \
                argscope([BNWithTrackedMults], network_complexity=self.network_complexity), \
                argscope(BatchNorm, decay=0.9, epsilon=1e-4):
            l = self.net_fn(inp, nonlin, self.n_context)
            logits = FullyConnectedWithTrackedMults('last_linear',
                                                    l,
                                                    out_dim=self.n_spks,
                                                    nl=tf.identity)

        prob = tf.nn.softmax(logits, name='output')

        # used for validation accuracy of utterance
        identity_guesses = flatten(tf.argmax(prob, axis=1))
        uniq_identities, _, count = tf.unique_with_counts(identity_guesses)
        idx_to_identity_with_most_votes = tf.argmax(count)
        chosen_identity = tf.gather(uniq_identities,
                                    idx_to_identity_with_most_votes)
        wrong = tf.expand_dims(tf.not_equal(chosen_identity,
                                            tf.cast(label[0], tf.int64)),
                               axis=0,
                               name='utt-wrong')

        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')
        add_moving_summary(cost)

        wrong = prediction_incorrect(logits, label, 1, name='wrong-top1')
        add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1'))

        with tf.name_scope('original-weight-summaries'):
            add_param_summary(('.*/W', ['rms', 'histogram']))
            add_param_summary(('.*/b', ['rms', 'histogram']))

        with tf.name_scope('activation-summaries'):

            def fn(name):
                return (
                    name.endswith('output') or name.endswith('output:0')
                ) and "Inference" not in name and 'quantized' not in name

            tensors = get_tensors_from_graph(tf.get_default_graph(), fn)
            print("Adding activation tensors to summary:", tensors)
            for tensor in tensors:
                add_tensor_summary(tensor, ['rms', 'histogram'])

        if self.regularize:
            # decreasing regularization on all W of fc layers
            wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(),
                                              480000, 0.2, True)
            wd_cost = tf.multiply(wd_w,
                                  regularize_cost('.*/W', tf.nn.l2_loss),
                                  name='wd_cost')
            add_moving_summary(wd_cost)
            self.cost = tf.add_n([cost, wd_cost], name='cost')
        else:
            self.cost = tf.identity(cost, name='cost')

        tf.constant([self.network_complexity['mults']], name='TotalMults')
        tf.constant([self.network_complexity['weights']], name='TotalWeights')
        logger.info("Parameter count: {}".format(self.network_complexity))
Exemple #59
0
    def _build_graph(self, inputs):
        is_training = get_current_tower_context().is_training
        image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs
        image = tf.expand_dims(image, 0)

        # FSxFSxNAx4 (FS=MAX_SIZE//ANCHOR_STRIDE)
        with tf.name_scope('anchors'):
            all_anchors = tf.constant(get_all_anchors(),
                                      name='all_anchors',
                                      dtype=tf.float32)
            fm_anchors = tf.slice(
                all_anchors, [0, 0, 0, 0],
                tf.stack([
                    tf.shape(image)[1] // config.ANCHOR_STRIDE,
                    tf.shape(image)[2] // config.ANCHOR_STRIDE, -1, -1
                ]),
                name='fm_anchors')
            anchor_boxes_encoded = encode_bbox_target(anchor_boxes, fm_anchors)

        image = image_preprocess(image, bgr=True)
        image = tf.transpose(image, [0, 3, 1, 2])

        # resnet50
        featuremap = pretrained_resnet_conv4(image, [3, 4, 6])
        rpn_label_logits, rpn_box_logits = rpn_head(featuremap)
        rpn_label_loss, rpn_box_loss = rpn_losses(anchor_labels,
                                                  anchor_boxes_encoded,
                                                  rpn_label_logits,
                                                  rpn_box_logits)

        decoded_boxes = decode_bbox_target(
            rpn_box_logits, fm_anchors)  # (fHxfWxNA)x4, floatbox
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            decoded_boxes, tf.reshape(rpn_label_logits, [-1]),
            tf.shape(image)[2:])

        if is_training:
            rcnn_sampled_boxes, rcnn_encoded_boxes, rcnn_labels = sample_fast_rcnn_targets(
                proposal_boxes, gt_boxes, gt_labels)
            boxes_on_featuremap = rcnn_sampled_boxes * (1.0 /
                                                        config.ANCHOR_STRIDE)
            roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)
            feature_fastrcnn = resnet_conv5(roi_resized)  # nxc
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head(
                feature_fastrcnn, config.NUM_CLASS)

            fastrcnn_label_loss, fastrcnn_box_loss = fastrcnn_losses(
                rcnn_labels, rcnn_encoded_boxes, fastrcnn_label_logits,
                fastrcnn_box_logits)

            wd_cost = regularize_cost(
                '(?:group1|group2|group3|rpn|fastrcnn)/.*W',
                l2_regularizer(1e-4),
                name='wd_cost')

            self.cost = tf.add_n([
                rpn_label_loss, rpn_box_loss, fastrcnn_label_loss,
                fastrcnn_box_loss, wd_cost
            ], 'total_cost')

            for k in self.cost, wd_cost:
                add_moving_summary(k)
        else:
            roi_resized = roi_align(
                featuremap, proposal_boxes * (1.0 / config.ANCHOR_STRIDE), 14)
            feature_fastrcnn = resnet_conv5(roi_resized)  # nxc
            label_logits, fastrcnn_box_logits = fastrcnn_head(
                feature_fastrcnn, config.NUM_CLASS)
            label_probs = tf.nn.softmax(label_logits,
                                        name='fastrcnn_all_probs')  # NP,
            labels = tf.argmax(label_logits, axis=1)
            fg_ind, fg_box_logits = fastrcnn_predict_boxes(
                labels, fastrcnn_box_logits)
            fg_label_probs = tf.gather(label_probs,
                                       fg_ind,
                                       name='fastrcnn_fg_probs')
            fg_boxes = tf.gather(proposal_boxes, fg_ind)

            fg_box_logits = fg_box_logits / tf.constant(
                config.FASTRCNN_BBOX_REG_WEIGHTS)
            decoded_boxes = decode_bbox_target(fg_box_logits,
                                               fg_boxes)  # Nfx4, floatbox
            decoded_boxes = tf.identity(decoded_boxes,
                                        name='fastrcnn_fg_boxes')
Exemple #60
0
    def _build_graph(self, inputs):
        ####
        def down_conv_block(name, l, channel, nr_blks, stride=1):
            with tf.variable_scope(name):
                if stride != 1:
                    assert stride == 2, 'U-Net supports stride 2 down-sample only'
                    l = MaxPooling('max_pool', l, 2, strides=2)
                for idx in range(0, nr_blks):
                    l = Conv2D('conv_%d' % idx,
                               l,
                               channel,
                               3,
                               padding='valid',
                               strides=1,
                               activation=BNReLU)
            return l

        ####
        def up_conv_block(name, l, shorcut, channel, nr_blks, stride=2):
            with tf.variable_scope(name):
                if stride != 1:
                    up_channel = l.get_shape().as_list()[1]  # NCHW
                    assert stride == 2, 'U-Net supports stride 2 up-sample only'
                    l = Conv2DTranspose('deconv', l, up_channel, 2, strides=2)
                    l = tf.concat([l, shorcut], axis=1)
                for idx in range(0, nr_blks):
                    l = Conv2D('conv_%d' % idx,
                               l,
                               channel,
                               3,
                               padding='valid',
                               strides=1,
                               activation=BNReLU)
            return l

        ####
        is_training = get_current_tower_context().is_training

        images, truemap_coded = inputs

        orig_imgs = images

        if self.type_classification:
            true_type = truemap_coded[..., 1]
            true_type = tf.cast(true_type, tf.int32)
            true_type = tf.identity(true_type, name='truemap-type')
            one_type = tf.one_hot(true_type, self.nr_types, axis=-1)
            true_type = tf.expand_dims(true_type, axis=-1)

        true_dst = truemap_coded[..., -1]
        true_dst = tf.expand_dims(true_dst, axis=-1)
        true_dst = tf.identity(true_dst, name='truemap-dst')

        #### Xavier initializer
        with argscope(Conv2D, activation=tf.identity, use_bias=True,
                      kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d(),
                      bias_initializer=tf.constant_initializer(0.1)), \
                argscope([Conv2D, Conv2DTranspose, MaxPooling, BatchNorm], data_format=self.data_format):

            i = tf.transpose(images / 255.0, [0, 3, 1, 2])

            ####
            with tf.variable_scope('encoder'):
                e0 = down_conv_block('e0', i, 32, nr_blks=2, stride=1)
                e1 = down_conv_block('e1', e0, 64, nr_blks=2, stride=2)
                e2 = down_conv_block('e2', e1, 128, nr_blks=2, stride=2)
                e3 = down_conv_block('e3', e2, 256, nr_blks=2, stride=2)
                e4 = down_conv_block('e4', e3, 512, nr_blks=2, stride=2)

                c0 = crop_op(e0, (176, 176))
                c1 = crop_op(e1, (80, 80))
                c2 = crop_op(e2, (32, 32))
                c3 = crop_op(e3, (8, 8))

            with tf.variable_scope('decoder'):
                d3 = up_conv_block('d3', e4, c3, 256, nr_blks=2, stride=2)
                d2 = up_conv_block('d2', d3, c2, 128, nr_blks=2, stride=2)
                d1 = up_conv_block('d1', d2, c1, 64, nr_blks=2, stride=2)
                d0 = up_conv_block('d0', d1, c0, 32, nr_blks=2, stride=2)

            ####
            logi_dst = Conv2D('conv_out_dst', d0, 1, 1, activation=tf.identity)
            logi_dst = tf.transpose(logi_dst, [0, 2, 3, 1])
            pred_dst = tf.identity(logi_dst, name='predmap-dst')

            if self.type_classification:
                logi_type = Conv2D('conv_out_type',
                                   d0,
                                   self.nr_types,
                                   1,
                                   activation=tf.identity)
                logi_type = tf.transpose(logi_type, [0, 2, 3, 1])
                soft_type = tf.nn.softmax(logi_type, axis=-1)
                # encoded so that inference can extract all output at once
                predmap_coded = tf.concat([soft_type, pred_dst], axis=-1)
            else:
                predmap_coded = pred_dst

            # * channel ordering: type-map, segmentation map
            # encoded so that inference can extract all output at once
            predmap_coded = tf.identity(predmap_coded, name='predmap-coded')

        ####
        if is_training:
            ######## LOSS
            loss = 0
            ### regression loss
            loss_mse = pred_dst - true_dst
            loss_mse = loss_mse * loss_mse
            loss_mse = tf.reduce_mean(loss_mse, name='loss_mse')
            loss += loss_mse

            if self.type_classification:
                loss_type = categorical_crossentropy(soft_type, one_type)
                loss_type = tf.reduce_mean(loss_type,
                                           name='loss-xentropy-class')
                add_moving_summary(loss_type)
                loss += loss_type

            wd_loss = regularize_cost('.*/W',
                                      l2_regularizer(5.0e-6),
                                      name='l2_regularize_loss')
            loss += wd_loss

            self.cost = tf.identity(loss, name='cost')
            add_moving_summary(self.cost)
            ####

            add_param_summary(('.*/W', ['histogram']))  # monitor W

            #### logging visual sthg
            orig_imgs = tf.cast(orig_imgs, tf.uint8)
            tf.summary.image('input', orig_imgs, max_outputs=1)

            orig_imgs = crop_op(orig_imgs, (184, 184), "NHWC")

            pred_dst = colorize(pred_dst[..., 0], cmap='jet')
            true_dst = colorize(true_dst[..., 0], cmap='jet')

            viz = tf.concat([
                orig_imgs,
                true_dst,
                pred_dst,
            ], 2)
            tf.summary.image('output', viz, max_outputs=1)

        return