Esempio n. 1
0
    def build_graph(self, *args):
        costs = []
        for i, name in enumerate(self.agent_names):
            joint_state, action, reward, isOver, comb_mask, joint_fine_mask = args[i * 6:(i + 1) * 6]
            with tf.variable_scope(name):
                with conditional(name is None, varreplace.freeze_variables()):
                    state = tf.identity(joint_state[:, 0, :, :, :], name='state')
                    fine_mask = tf.identity(joint_fine_mask[:, 0, :], name='fine_mask')
                    self.predict_value = self.get_DQN_prediction(state, comb_mask, fine_mask)
                    if not get_current_tower_context().is_training:
                        continue

                    # reward = tf.clip_by_value(reward, -1, 1)
                    next_state = tf.identity(joint_state[:, 1, :, :, :], name='next_state')
                    next_fine_mask = tf.identity(joint_fine_mask[:, 1, :], name='next_fine_mask')
                    action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

                    pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)  # N,
                    max_pred_reward = tf.reduce_mean(tf.reduce_max(
                        self.predict_value, 1), name='predict_reward')
                    summary.add_moving_summary(max_pred_reward)

                    with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True):
                        # we are alternating between comb and fine states
                        targetQ_predict_value = self.get_DQN_prediction(next_state, tf.logical_not(comb_mask), next_fine_mask)    # NxA

                    if self.method != 'Double':
                        # DQN
                        best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,
                    else:
                        # Double-DQN
                        next_predict_value = self.get_DQN_prediction(next_state, tf.logical_not(comb_mask), next_fine_mask)
                        self.greedy_choice = tf.argmax(next_predict_value, 1)   # N,
                        predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0)
                        best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

                    target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)
                    # target = tf.Print(target, [target], summarize=100)
                    # tf.assert_greater(target, -100., message='target error')
                    # tf.assert_greater(pred_action_value, -100., message='pred value error')
                    # pred_action_value = tf.Print(pred_action_value, [pred_action_value], summarize=100)

                    l2_loss = tensorpack.regularize_cost(name + '/dqn.*W{1}', l2_regularizer(1e-3))
                    # cost = tf.losses.mean_squared_error(target, pred_action_value)
                    with tf.control_dependencies([tf.assert_greater(target, -100., message='target error'), tf.assert_greater(pred_action_value, -100., message='pred value error')]):
                        cost = tf.losses.huber_loss(
                                        target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
                    summary.add_param_summary((name + '.*/W', ['histogram', 'rms']))   # monitor all W
                    summary.add_moving_summary(cost)
                    costs.append(cost)
        if not get_current_tower_context().is_training:
            return
        return tf.add_n([costs[i] * self.cost_weights[i] for i in range(3)])
Esempio n. 2
0
    def build_graph(self, image, label):
        ctx = get_current_tower_context()

        # all-zero tensor hurt performance for some reason.
        label = tf.random_uniform([self.batch],
                                  minval=0,
                                  maxval=1000 - 1,
                                  dtype=tf.int32,
                                  name='synthetic_labels')

        # our fake images are in [0, 1]
        image = tf.cast(image, tf.float32) * 2.0 - 1.
        if self.data_format == 'NCHW':
            image = tf.transpose(image, [0, 3, 1, 2])

        logits = self._get_logits(image)
        if logits.dtype != tf.float32:
            logger.info("Casting back to fp32 ...")
            logits = tf.cast(logits, tf.float32)

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        loss = tf.reduce_mean(loss, name='xentropy-loss')
        # TODO tensorflow/benchmark only computes WD on 1 GPU.
        if False:
            self.cost = loss  # disable wd
        else:
            wd_cost = regularize_cost('.*', tf.nn.l2_loss) * 1e-4
            self.cost = tf.add_n([loss, wd_cost], name='cost')

        self.loss = self.cost  # NEEDED FOR RAY DEMO
Esempio n. 3
0
    def _build_graph(self, inputs):
        self.x_mfcc, self.y_spec, self.y_mel = inputs

        is_training = get_current_tower_context().is_training

        # build net1
        # train1에서 학습된 SI-ASR 모델에서 PPGs 추출.
        # 목표 음성에서 추출된 MFCC를 입력해 해당 MFCC에 대한 PPG들을 뽑아낸다.
        self.net1 = Net1()
        with tf.variable_scope('net1'):
            self.ppgs, _, _ = self.net1.network(self.x_mfcc, is_training)
        self.ppgs = tf.identity(self.ppgs, name='ppgs')

        # build net2
        # net1을 통과시켜 얻은 PPGs를 이용해 스펙트로그램과 mel-스펙트로그램을 예측한다.
        with tf.variable_scope('net2'):
            self.pred_spec, self.pred_mel = self.network(
                self.ppgs, is_training)
        self.pred_spec = tf.identity(self.pred_spec, name='pred_spec')

        self.cost = self.loss()

        # summaries
        tf.summary.scalar('net2/train/loss', self.cost)

        if not is_training:
            tf.summary.scalar('net2/eval/summ_loss', self.cost)
Esempio n. 4
0
    def build_graph(self, image, label):
        # all-zero tensor hurt performance for some reason.
        ctx = get_current_tower_context()
        label = tf.random_uniform(
            [args.batch],
            minval=0, maxval=1000 - 1,
            dtype=tf.int32, name='synthetic_labels')

        # our fake images are in [0, 1]
        target_dtype = tf.float16 if args.use_fp16 else tf.float32
        if image.dtype != target_dtype:
            image = tf.cast(image, target_dtype) * 2.0 - 1.
        if self.data_format == 'NCHW':
            image = tf.transpose(image, [0, 3, 1, 2])

        logits = self._get_logits(image)

        if logits.dtype != tf.float32:
            logger.info("Casting logits back to fp32 ...")
            logits = tf.cast(logits, tf.float32)

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        loss = tf.reduce_mean(loss, name='xentropy-loss')
        if ctx.index == ctx.total - 1:
            wd_cost = regularize_cost('.*', tf.nn.l2_loss) * (1e-4 * ctx.total)
            return tf.add_n([loss, wd_cost], name='cost')
        else:
            return loss
Esempio n. 5
0
    def build_graph(self, state, val_target):
        values = tf.squeeze(self.get_pred(state), axis=1, name='value')
        # fake_values = tf.zeros_like(values)
        is_training = get_current_tower_context().is_training
        if not is_training:
            return

        with tf.variable_scope("value_loss"):
            value_loss = tf.reduce_mean(tf.squared_difference(
                val_target, values),
                                        name='value_loss')
        # l2_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope=SCOPE)
        # NOTE: this collection doesn't always grow with towers.
        # It only grows with actual variable creation, but not get_variable call.
        # ctx = get_current_tower_context()
        # if ctx.has_own_variables:  # be careful of the first tower (name='')
        #     l2_loss = ctx.get_collection_in_tower(tf.GraphKeys.REGULARIZATION_LOSSES)
        # else:
        #     l2_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        # if len(l2_loss) > 0:
        #     logger.info("regularize_cost_from_collection() found {} regularizers "
        #                 "in REGULARIZATION_LOSSES collection.".format(len(l2_loss)))

        loss = value_loss
        loss = tf.identity(loss, name='loss')

        add_moving_summary(loss, decay=0)
        add_moving_summary(value_loss, decay=0)
        return loss
Esempio n. 6
0
    def build_graph(self, inputs, y_spec, y_mel):
        self.x_mfcc, self.y_spec, self.y_mel = inputs, y_spec, y_mel

        is_training = get_current_tower_context().is_training

        # build net1
        self.net1 = Net1()
        with tf.variable_scope('net1'):
            self.ppgs, _, _ = self.net1.network(self.x_mfcc, is_training)
        self.ppgs = tf.identity(self.ppgs, name='ppgs')

        # build net2
        with tf.variable_scope('net2'):
            self.pred_spec, self.pred_mel = self.network(
                self.ppgs, is_training)
        self.pred_spec = tf.identity(self.pred_spec, name='pred_spec')

        self.cost = self.loss()

        # summaries
        tf.summary.scalar('net2/train/loss', self.cost)

        if not is_training:
            tf.summary.scalar('net2/eval/summ_loss', self.cost)
        return self.cost
    def _build_graph(self, inputs):
        self.x_mfcc, self.y_spec, self.y_mel = inputs

        is_training = get_current_tower_context().is_training

        # build net1
        self.net1 = Net1()
        with tf.variable_scope('net1'):
            self.ppgs, _, _ = self.net1.network(self.x_mfcc, is_training)

        self.ppgs = tf.identity(self.ppgs, name='ppgs')

        # build net2
        with tf.variable_scope('net2'):
            self.mu, self.log_var, self.log_pi = self.network(
                self.ppgs, is_training)

        self.cost = self.loss()

        # summaries
        tf.summary.scalar('net2/train/loss', self.cost)
        tf.summary.histogram('net2/train/mu', self.mu)
        tf.summary.histogram('net2/train/var', tf.exp(self.log_var))
        tf.summary.histogram('net2/train/pi', tf.exp(self.log_pi))

        if not is_training:
            tf.summary.scalar('net2/eval/summ_loss', self.cost)

            # build for conversion phase
            self.convert()
    def _build_graph(self, inputs):
        self.x_mfccs, self.y_ppgs = inputs
        is_training = get_current_tower_context().is_training
        with tf.variable_scope('net1'):
            self.ppgs, self.preds, self.logits = self.network(
                self.x_mfccs, is_training)
        self.cost = self.loss()
        acc = self.acc()

        # summaries
        tf.summary.scalar('net1/train/loss', self.cost)
        tf.summary.scalar('net1/train/acc', acc)

        if not is_training:
            # summaries
            tf.summary.scalar('net1/eval/summ_loss', self.cost)
            tf.summary.scalar('net1/eval/summ_acc', acc)

            # for confusion matrix
            tf.reshape(self.y_ppgs,
                       shape=(tf.size(self.y_ppgs), ),
                       name='net1/eval/y_ppg_1d')
            tf.reshape(self.preds,
                       shape=(tf.size(self.preds), ),
                       name='net1/eval/pred_ppg_1d')
Esempio n. 9
0
    def _build_graph(self, inputs):
        #self.x_mfcc, self.y_spec, self.y_mel = inputs
        self.y_mel, self.ppgs = inputs
        is_training = get_current_tower_context().is_training

        # build net1
        '''
        self.net1 = Net1()
        with tf.variable_scope('net1'):
            self.ppgs, _, _ = self.net1.network(self.x_mfcc, is_training)
        self.ppgs = tf.identity(self.ppgs, name='ppgs')
        '''


        # build net2
        with tf.variable_scope('net2'):
            #self.pred_spec, self.pred_mel = self.network(self.ppgs, is_training)
            print("begin prediction")
            self.pred_mel = self.network(self.ppgs, is_training)
        #self.pred_spec = tf.identity(self.pred_spec, name='pred_spec')
        self.pred_mel = tf.identity(self.pred_mel, name = 'pred_mel')

        self.cost = self.loss()

        # summaries
        tf.summary.scalar('net2/train/loss', self.cost)

        if not is_training:
            tf.summary.scalar('net2/eval/summ_loss', self.cost)
Esempio n. 10
0
    def _build_graph(self, inputs):
        comb_state, action, reward, isOver = inputs
        comb_state = tf.cast(comb_state, tf.float32)
        state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, -1, self.channel], name='state')
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0, 0, 0, 0, 1], [-1, -1, -1, -1, self.channel], name='next_state')
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'):
            targetQ_predict_value = self.get_DQN_prediction(next_state)    # NxA

        # if self.method != 'Double':
        #     # DQN
        #     best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,
        # else:
        #     # Double-DQN
        #     next_predict_value = self.get_DQN_prediction(next_state)
        #     self.greedy_choice = tf.argmax(next_predict_value, 1)   # N,
        #     predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0)
        #     best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        # target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        # if (self.method == 'DQN') or (self.method == 'Dueling'):
        if 'Double' not in self.method :
            # DQN
            best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,
        # if self.method == 'Double' or ('DuelingDouble'):
        else:
            # Double-DQN
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)   # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)



        # self.cost = tf.clip_by_value(tf.losses.huber_loss(
        #                                 target,
        #                                 pred_action_value,
        #                                 reduction=tf.losses.Reduction.MEAN)
        #                             , -1, 1, name='cost')
        self.cost = tf.losses.huber_loss(target, pred_action_value,
                                         reduction=tf.losses.Reduction.MEAN)

        summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
                                  ('fc.*/W', ['histogram', 'rms']))   # monitor all W
        summary.add_moving_summary(self.cost)
Esempio n. 11
0
 def build_graph(self, im, gt, training=None):
     model_class = choose_model(self.cfg.network["name"])
     if training == None:
         training = get_current_tower_context().is_training
     self.ops = model_class(im, self.cfg, training)
     self.loss = build_loss(self.ops, im, gt, self.cfg)
     tf.summary.scalar("Loss", self.loss)
     return self.loss
Esempio n. 12
0
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1  # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope([tf.layers.conv2d],
                      padding='same',
                      activation=tf.nn.relu):
            l = tf.layers.conv2d(image, 32, 3, name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv1')
            l = tf.layers.conv2d(l, 32, 3, name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(
                l, rate=0.5, training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1),
                          tf.float32,
                          name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
    def network(self, ppgs, is_training):
        # Pre-net
        prenet_out = prenet(
            ppgs,
            num_units=[hp.train2.hidden_units, hp.train2.hidden_units // 2],
            dropout_rate=hp.train2.dropout_rate,
            is_training=is_training)  # (N, T, E/2)

        # CBHG1: mel-scale
        # pred_mel = cbhg(prenet_out, hp.train2.num_banks, hp.train2.hidden_units // 2,
        #                 hp.train2.num_highway_blocks, hp.train2.norm_type, is_training,
        #                 scope="cbhg_mel")
        # pred_mel = tf.layers.dense(pred_mel, self.y_mel.shape[-1])  # (N, T, n_mels)
        pred_mel = prenet_out

        # CBHG2: linear-scale
        out = tf.layers.dense(pred_mel,
                              hp.train2.hidden_units // 2)  # (N, T, n_mels)
        out = cbhg(out,
                   hp.train2.num_banks,
                   hp.train2.hidden_units // 2,
                   hp.train2.num_highway_blocks,
                   hp.train2.norm_type,
                   is_training,
                   scope="cbhg_linear")

        _, n_timesteps, n_bins = self.y_spec.get_shape().as_list()
        n_units = n_bins * hp.train2.n_mixtures
        out = tf.layers.dense(out,
                              n_units * 3,
                              bias_initializer=tf.random_uniform_initializer(
                                  minval=-3., maxval=3.))

        mu = tf.nn.sigmoid(out[..., :n_units])
        mu = tf.reshape(
            mu,
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)

        log_var = tf.maximum(out[..., n_units:2 * n_units], -7.0)
        log_var = tf.reshape(
            log_var,
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)

        log_pi = tf.reshape(
            out[..., 2 * n_units:3 * n_units],
            shape=(-1, n_timesteps, n_bins,
                   hp.train2.n_mixtures))  # (N, T, 1+hp.n_fft//2, n_mixtures)
        log_pi = normalize(log_pi,
                           type='ins',
                           is_training=get_current_tower_context().is_training,
                           scope='normalize_pi')
        log_pi = tf.nn.log_softmax(log_pi)

        return mu, log_var, log_pi
Esempio n. 14
0
    def __init__(self,
                 proposals,
                 roi_func,
                 fastrcnn_head_func,
                 gt_targets,
                 image_shape2d,
                 num_classes,
                 roi_func_extra=None):
        #参数:
        # roi_func: 方框->festure的函数      我猜roi是指感兴趣区域:region of interest
        # fastrcnn_head_fun: fastrcnn head函数,传入处理后的features
        # gt_targets = 方框 + label
        """
        Args:
            proposals: BoxProposals
            roi_func (boxes -> features): a function to crop features with rois   
            fastrcnn_head_func (features -> features): the fastrcnn head to apply on the cropped features  
            gt_targets (gt_boxes, gt_labels):       
        """
        # 设置 Cascade 属性
        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)
        self.gt_boxes, self.gt_labels = gt_targets
        del self.gt_targets

        # 三个 cascade
        self.num_cascade_stages = len(cfg.CASCADE.IOUS)

        self.is_training = get_current_tower_context().is_training
        if self.is_training:

            @tf.custom_gradient
            def scale_gradient(x):
                return x, lambda dy: dy * (1.0 / self.num_cascade_stages)

            self.scale_gradient = scale_gradient
        else:
            self.scale_gradient = tf.identity

        ious = cfg.CASCADE.IOUS
        # It's unclear how to do >3 stages, so it does not make sense to implement them
        assert self.num_cascade_stages == 3, "Only 3-stage cascade was implemented!"
        with tf.variable_scope('cascade_rcnn_stage1'):
            H1, B1 = self.run_head(self.proposals, 0)

        with tf.variable_scope('cascade_rcnn_stage2'):
            B1_proposal = self.match_box_with_gt(B1, ious[1])
            H2, B2 = self.run_head(B1_proposal, 1)

        with tf.variable_scope('cascade_rcnn_stage3'):
            B2_proposal = self.match_box_with_gt(B2, ious[2])
            H3, B3 = self.run_head(B2_proposal, 2)
        self._cascade_boxes = [B1, B2, B3]
        self._heads = [H1, H2, H3]
Esempio n. 15
0
def FullyConnectedWithTrackedMults(x,
                                   out_dim,
                                   network_complexity=None,
                                   W_init=None,
                                   b_init=None,
                                   nl=tf.identity,
                                   use_bias=True):
    """
    Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor.
    It is an equivalent of `tf.layers.dense` except for naming conventions.

    Args:
        x (tf.Tensor): a tensor to be flattened except for the first dimension.
        out_dim (int): output dimension
        W_init: initializer for W. Defaults to `variance_scaling_initializer`.
        b_init: initializer for b. Defaults to zero.
        nl: a nonlinearity function
        use_bias (bool): whether to use bias.

    Returns:
        tf.Tensor: a NC tensor named ``output`` with attribute `variables`.

    Variable Names:

    * ``W``: weights of shape [in_dim, out_dim]
    * ``b``: bias
    """
    x = symbf.batch_flatten(x)

    if W_init is None:
        W_init = tf.contrib.layers.variance_scaling_initializer()
    if b_init is None:
        b_init = tf.constant_initializer()

    if get_current_tower_context().is_main_training_tower:
        network_complexity['weights'] += out_dim * x.get_shape().as_list()[1]
        network_complexity['mults'] += out_dim * x.get_shape().as_list()[1]
        if use_bias:
            network_complexity['weights'] += out_dim

    W = tf.get_variable('W', (x.get_shape().as_list()[1], out_dim),
                        initializer=W_init)
    if use_bias:
        b = tf.get_variable('b', out_dim, initializer=W_init)

    product = tf.matmul(x, W)

    ret = nl(tf.nn.bias_add(product, b) if use_bias else product,
             name='output')
    ret.variables = VariableHolder(W=W)
    if use_bias:
        ret.variables.b = b

    return ret
Esempio n. 16
0
def generate_rpn_proposals(boxes, scores, img_shape):
    """
    Args:
        boxes: nx4 float dtype, decoded to floatbox already
        scores: n float, the logits
        img_shape: [h, w]

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    if get_current_tower_context().is_training:
        PRE_NMS_TOPK = config.TRAIN_PRE_NMS_TOPK
        POST_NMS_TOPK = config.TRAIN_POST_NMS_TOPK
    else:
        PRE_NMS_TOPK = config.TEST_PRE_NMS_TOPK
        POST_NMS_TOPK = config.TEST_POST_NMS_TOPK

    @under_name_scope()
    def clip_boxes(boxes, window):
        boxes = tf.maximum(boxes, 0.0)
        m = tf.tile(tf.reverse(window, [0]), [2])    # (4,)
        boxes = tf.minimum(boxes, tf.to_float(m))
        return boxes

    topk = tf.minimum(PRE_NMS_TOPK, tf.size(scores))
    topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
    topk_boxes = tf.gather(boxes, topk_indices)
    topk_boxes = clip_boxes(topk_boxes, img_shape)

    topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
    topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
    # nx1x2 each
    wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
    valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1)  # n,
    topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
    topk_valid_scores = tf.boolean_mask(topk_scores, valid)

    topk_valid_boxes_y1x1y2x2 = tf.reshape(
        tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
        (-1, 4), name='nms_input_boxes')
    nms_indices = tf.image.non_max_suppression(
        topk_valid_boxes_y1x1y2x2,
        topk_valid_scores,
        max_output_size=POST_NMS_TOPK,
        iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)

    topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
    final_boxes = tf.gather(
        topk_valid_boxes,
        nms_indices, name='boxes')
    final_scores = tf.gather(topk_valid_scores, nms_indices, name='scores')
    final_probs = tf.gather(topk_valid_scores, nms_indices, name='probs')
    return final_boxes, final_scores
    def build_graph(self, image, label):
        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1  # center the pixels values at zero

        with argscope([tf.layers.conv2d],
                      padding='same',
                      activation=tf.nn.relu):
            l = tf.layers.conv2d(image, 32, 3, name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv1')
            l = tf.layers.conv2d(l, 32, 3, name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(
                l, rate=0.5, training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        tf.nn.softmax(logits, name='prob')  # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1),
                          tf.float32,
                          name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensorboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
        return total_cost
Esempio n. 18
0
    def build_graph(self, image, label):
        """This function should build the model which takes the input variables
        and return cost at the end"""

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        # The context manager `argscope` sets the default option for all the layers under
        # this context. Here we use 32 channel convolution with shape 3x3
        with argscope([tf.layers.conv2d], padding='same', activation=tf.nn.relu):
            l = tf.layers.conv2d(image, 32, 3, name='conv0')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv1')
            l = tf.layers.conv2d(l, 32, 3, name='conv2')
            l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
            l = tf.layers.conv2d(l, 32, 3, name='conv3')
            l = tf.layers.flatten(l)
            l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
            l = tf.layers.dropout(l, rate=0.5,
                                  training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error & accuracy (in a moving average fashion). The value will be automatically
        # 1. written to tensosrboard
        # 2. written to stat.json
        # 3. printed after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        # If you don't like regex, you can certainly define the cost in any other methods.
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        total_cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, total_cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
        # the function should return the total cost to be optimized
        return total_cost
Esempio n. 19
0
    def _build_graph(self, inputs):
        """This function should build the model which takes the input variables
        and define self.cost at the end"""

        # inputs contains a list of input variables defined above
        image, label = inputs

        # In tensorflow, inputs to convolution function are assumed to be
        # NHWC. Add a single channel here.
        image = tf.expand_dims(image, 3)

        image = image * 2 - 1   # center the pixels values at zero

        l = tf.layers.conv2d(image, 32, 3, padding='same', activation=tf.nn.relu, name='conv0')
        l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv1')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv2')
        l = tf.layers.max_pooling2d(l, 2, 2, padding='valid')
        l = tf.layers.conv2d(l, 32, 3, padding='same', activation=tf.nn.relu, name='conv3')
        l = tf.layers.flatten(l)
        l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc0')
        l = tf.layers.dropout(l, rate=0.5,
                              training=get_current_tower_context().is_training)
        logits = tf.layers.dense(l, 10, activation=tf.identity, name='fc1')

        tf.nn.softmax(logits, name='prob')   # a Bx10 with probabilities

        # a vector of length B with loss of each sample
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label)
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  # the average cross-entropy loss

        correct = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32, name='correct')
        accuracy = tf.reduce_mean(correct, name='accuracy')

        # This will monitor training error (in a moving_average fashion):
        # 1. write the value to tensosrboard
        # 2. write the value to stat.json
        # 3. print the value after each epoch
        train_error = tf.reduce_mean(1 - correct, name='train_error')
        summary.add_moving_summary(train_error, accuracy)

        # Use a regex to find parameters to apply weight decay.
        # Here we apply a weight decay on all W (weight matrix) of all fc layers
        wd_cost = tf.multiply(1e-5,
                              regularize_cost('fc.*/kernel', tf.nn.l2_loss),
                              name='regularize_loss')
        self.cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost, self.cost)

        # monitor histogram of all weight (of conv and fc layers) in tensorboard
        summary.add_param_summary(('.*/kernel', ['histogram', 'rms']))
Esempio n. 20
0
    def build_graph(self, comb_state, action, reward, isOver):
        comb_state = tf.cast(comb_state, tf.float32)
        input_rank = comb_state.shape.rank

        state = tf.slice(comb_state, [0] * input_rank,
                         [-1] * (input_rank - 1) + [self.history],
                         name='state')

        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0] * (input_rank - 1) + [1],
                              [-1] * (input_rank - 1) + [self.history],
                              name='next_state')
        next_state = tf.reshape(next_state, self._stacked_state_shape)
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot,
                                          1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1),
                                         name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'), varreplace.freeze_variables(
                skip_collection=True):
            targetQ_predict_value = self.get_DQN_prediction(next_state)  # NxA

        if self.method != 'Double':
            # DQN
            best_v = tf.reduce_max(targetQ_predict_value, 1)  # N,
        else:
            # Double-DQN
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)  # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions,
                                        1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(
            isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        cost = tf.losses.huber_loss(target,
                                    pred_action_value,
                                    reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(
            ('conv.*/W', ['histogram', 'rms']),
            ('fc.*/W', ['histogram', 'rms']))  # monitor all W
        summary.add_moving_summary(cost)
        return cost
Esempio n. 21
0
 def create_level(level, input_channels, output_channels, inputs, bn):
     with tf.variable_scope('level_%d' % level, reuse=tf.AUTO_REUSE):
         features = mlp_conv(inputs, [
             input_channels,
             int(input_channels / 2),
             int(input_channels / 4),
             int(input_channels / 8),
             output_channels * int(tarch[level])
         ],
                             get_current_tower_context().is_training,
                             bn)
         features = tf.reshape(
             features, [tf.shape(features)[0], -1, output_channels])
     return features
Esempio n. 22
0
    def build_graph(self, joint_state, next_mask, action, reward, isOver):
        state = tf.identity(joint_state[:, 0, ...], name='state')
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        next_state = tf.identity(joint_state[:, 1, ...], name='next_state')
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot,
                                          1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1),
                                         name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'), varreplace.freeze_variables(
                skip_collection=True):
            # we are alternating between comb and fine states
            targetQ_predict_value = self.get_DQN_prediction(next_state)  # NxA

        if self.method != 'Double':
            # DQN
            self.greedy_choice = tf.argmax(targetQ_predict_value +
                                           (tf.to_float(next_mask) * 1e4),
                                           1)  # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions,
                                        1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)
        else:
            # Double-DQN
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(
                next_predict_value + (tf.to_float(next_mask) * 1e4), 1)  # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions,
                                        1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(
            isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        l2_loss = tensorpack.regularize_cost('.*W{1}', l2_regularizer(1e-3))
        # cost = tf.losses.mean_squared_error(target, pred_action_value)
        cost = tf.losses.huber_loss(target,
                                    pred_action_value,
                                    reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(('.*/W', ['histogram',
                                            'rms']))  # monitor all W
        summary.add_moving_summary(cost)
        return cost
Esempio n. 23
0
    def calc_inference_v1(end_points):
        nms_iou = tf.get_variable('nms_iou', shape=[], initializer=tf.constant_initializer(0.25), trainable=False)
        if not get_current_tower_context().is_training:
            def get_3d_bbox(box_size, heading_angle, center):
                batch_size = tf.shape(heading_angle)[0]
                c = tf.cos(heading_angle)
                s = tf.sin(heading_angle)
                zeros = tf.zeros_like(c)
                ones = tf.ones_like(c)
                rotation = tf.reshape(tf.stack([c, zeros, s, zeros, ones, zeros, -s, zeros, c], -1),
                                      tf.stack([batch_size, -1, 3, 3]))
                l, w, h = box_size[..., 0], box_size[..., 1], box_size[..., 2]  # lwh(xzy) order!!!
                corners = tf.reshape(tf.stack([l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2,
                                               h / 2, h / 2, h / 2, h / 2, -h / 2, -h / 2, -h / 2, -h / 2,
                                               w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2], -1),
                                     tf.stack([batch_size, -1, 3, 8]))
                return tf.einsum('ijkl,ijlm->ijmk', rotation, corners) + tf.expand_dims(center, 2)  # B * N * 8 * 3

            class_mean_size_tf = tf.constant(class_mean_size)
            size_cls_pred = tf.argmax(end_points['size_scores'], axis=-1)  # (B, proposal_num)
            size_cls_pred_onehot = tf.one_hot(size_cls_pred, depth=config.NS, axis=-1)  # (B, proposal_num, NS)
            size_residual_pred = tf.reduce_sum(tf.expand_dims(size_cls_pred_onehot, -1)  # (B, proposal_num, NS, -1)
                                               * tf.reshape(end_points['size_residuals_normalized'],
                                                            [-1, config.PROPOSAL_NUM, config.NS, 3]), axis=2)
            #  size_residual_pred : (B, proposal_num, 3)
            size_pred = tf.gather_nd(class_mean_size_tf, tf.expand_dims(size_cls_pred, -1)) \
                        * tf.maximum(1 + size_residual_pred, 1e-6)  # B * N * 3: size

            # calc center
            center_pred = end_points['center']  # (B, proposal_num, 3)
            heading_cls_pred = tf.argmax(end_points['heading_scores'], axis=-1)  # (B, proposal_num)
            heading_cls_pred_onehot = tf.one_hot(heading_cls_pred, depth=config.NH, axis=-1)  # (B, proposal_num, NH)
            heading_residual_pred = tf.reduce_sum(heading_cls_pred_onehot * end_points['heading_residuals_normalized'], axis=2)
            heading_pred = tf.floormod(
                (tf.cast(heading_cls_pred, tf.float32) * 2 + heading_residual_pred) * np.pi / config.NH,
                2 * np.pi)

            # with tf.control_dependencies([tf.print(size_residual_pred[0, :10, :]), tf.print(size_pred[0, :10, :])]):
            bboxes = get_3d_bbox(size_pred, heading_pred, center_pred)  # B * N * 8 * 3,  lhw(xyz) order!!!

            # bbox_corners = tf.concat([bboxes[:, :, 6, :], bboxes[:, :, 0, :]], axis=-1)  # B * N * 6,  lhw(xyz) order!!!
            # with tf.control_dependencies([tf.print(bboxes[0, 0])]):
            nms_idx = NMS3D(bboxes, tf.reduce_max(end_points['sem_cls_scores'], axis=-1), end_points['objectness_scores'],
                            nms_iou)  # Nnms * 2

            bboxes_pred = tf.gather_nd(bboxes, nms_idx, name='bboxes_pred')  # Nnms * 8 * 3
            class_scores_pred = tf.gather_nd(end_points['sem_cls_scores'], nms_idx, name='class_scores_pred')  # Nnms * C
            batch_idx = tf.identity(nms_idx[:, 0], name='batch_idx')  # Nnms, this is used to identify between batches
Esempio n. 24
0
    def _build_graph(self, inputs):
        state, action, reward, isOver = inputs
        state = tf.cast(state, tf.float32)
        # TODO: wtf is this?
        state = tf.slice(state, [0, 0, 0, 0, 0],
                         [-1, -1, -1, -1, self.channel],
                         name='state')
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        # FIXME I think this is history buffer stuff
        next_state = tf.slice(state, [0, 0, 0, 0, 1],
                              [-1, -1, -1, -1, self.channel],
                              name='next_state')
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot,
                                          1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1),
                                         name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'):
            targetQ_predict_value = self.get_DQN_prediction(next_state)  # NxA

        # TODO disable other models
        if 'Double' not in self.method:
            # DQN or Dueling
            best_v = tf.reduce_max(targetQ_predict_value, 1)  # N,
        else:
            # Double-DQN or DuelingDouble
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)  # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions,
                                        1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(
            isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)
        self.cost = tf.losses.huber_loss(target,
                                         pred_action_value,
                                         reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(
            ('conv.*/W', ['histogram', 'rms']),
            ('fc.*/W', ['histogram', 'rms']))  # monitor all W
        summary.add_moving_summary(self.cost)
Esempio n. 25
0
    def _build_graph(self, inputs):
        comb_state, action, reward, isOver = inputs
        comb_state = tf.cast(comb_state, tf.float32)
        state = tf.slice(comb_state, [0, 0, 0, 0], [-1, -1, -1, self.channel],
                         name='state')
        self.predict_value = self._get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0, 0, 0, 1],
                              [-1, -1, -1, self.channel],
                              name='next_state')
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot,
                                          1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(self.predict_value, 1),
                                         name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'), \
                collection.freeze_collection([tf.GraphKeys.TRAINABLE_VARIABLES]):
            targetQ_predict_value = self._get_DQN_prediction(next_state)  # NxA

        if self.method != 'Double':
            # DQN
            best_v = tf.reduce_max(targetQ_predict_value, 1)  # N,
        else:
            # Double-DQN
            sc = tf.get_variable_scope()
            with tf.variable_scope(sc, reuse=True):
                next_predict_value = self._get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)  # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions,
                                        1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(
            isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        self.cost = tf.reduce_mean(symbf.huber_loss(target -
                                                    pred_action_value),
                                   name='cost')
        summary.add_param_summary(
            ('conv.*/W', ['histogram', 'rms']),
            ('fc.*/W', ['histogram', 'rms']))  # monitor all W
        summary.add_moving_summary(self.cost)
Esempio n. 26
0
def generate_rpn_proposals(boxes, scores, img_shape):
    """
    Args:
        boxes: nx4 float dtype, decoded to floatbox already
        scores: n float, the logits
        img_shape: [h, w]

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    assert boxes.shape.ndims == 2, boxes.shape
    if get_current_tower_context().is_training:
        PRE_NMS_TOPK = config.TRAIN_PRE_NMS_TOPK
        POST_NMS_TOPK = config.TRAIN_POST_NMS_TOPK
    else:
        PRE_NMS_TOPK = config.TEST_PRE_NMS_TOPK
        POST_NMS_TOPK = config.TEST_POST_NMS_TOPK

    topk = tf.minimum(PRE_NMS_TOPK, tf.size(scores))
    topk_scores, topk_indices = tf.nn.top_k(scores, k=topk, sorted=False)
    topk_boxes = tf.gather(boxes, topk_indices)
    topk_boxes = clip_boxes(topk_boxes, img_shape)

    topk_boxes_x1y1x2y2 = tf.reshape(topk_boxes, (-1, 2, 2))
    topk_boxes_x1y1, topk_boxes_x2y2 = tf.split(topk_boxes_x1y1x2y2, 2, axis=1)
    # nx1x2 each
    wbhb = tf.squeeze(topk_boxes_x2y2 - topk_boxes_x1y1, axis=1)
    valid = tf.reduce_all(wbhb > config.RPN_MIN_SIZE, axis=1)  # n,
    topk_valid_boxes_x1y1x2y2 = tf.boolean_mask(topk_boxes_x1y1x2y2, valid)
    topk_valid_scores = tf.boolean_mask(topk_scores, valid)

    topk_valid_boxes_y1x1y2x2 = tf.reshape(
        tf.reverse(topk_valid_boxes_x1y1x2y2, axis=[2]),
        (-1, 4), name='nms_input_boxes')
    nms_indices = tf.image.non_max_suppression(
        topk_valid_boxes_y1x1y2x2,
        topk_valid_scores,
        max_output_size=POST_NMS_TOPK,
        iou_threshold=config.RPN_PROPOSAL_NMS_THRESH)

    topk_valid_boxes = tf.reshape(topk_valid_boxes_x1y1x2y2, (-1, 4))
    final_boxes = tf.gather(
        topk_valid_boxes,
        nms_indices, name='boxes')
    final_scores = tf.gather(topk_valid_scores, nms_indices, name='scores')
    tf.sigmoid(final_scores, name='probs')  # for visualization
    return final_boxes, final_scores
Esempio n. 27
0
 def _get_logits(self, image):
     ctx = get_current_tower_context()
     with maybe_freeze_updates(ctx.index > 0):
         network = ConvNetBuilder(
             image, 3, True,
             use_tf_layers=True,
             data_format=self.data_format,
             dtype=tf.float16 if args.use_fp16 else tf.float32,
             variable_dtype=tf.float32)
         with custom_getter_scope(network.get_custom_getter()):
             dataset = lambda: 1
             dataset.name = 'imagenet'
             model_conf = model_config.get_model_config('resnet50', dataset)
             model_conf.set_batch_size(args.batch)
             model_conf.add_inference(network)
             return network.affine(1000, activation='linear', stddev=0.001)
Esempio n. 28
0
    def _build_graph(self, inputs):
        self.r_mel, self.t_spec, self.t_mel, self.r_spec = inputs

        is_training = get_current_tower_context().is_training

        # build net
        with tf.variable_scope('net'):
            self.pred_spec, self.pred_mel = self.network(self.r_mel, is_training)
        self.pred_spec = tf.identity(self.pred_spec, name='pred_spec')

        self.cost = self.loss()

        # summaries
        tf.summary.scalar('net/train/loss', self.cost)

        if not is_training:
            tf.summary.scalar('net/eval/summ_loss', self.cost)
Esempio n. 29
0
    def _build_graph(self, inputs):
        wav, melspec = inputs
        is_training = get_current_tower_context().is_training

        out = self(*inputs, is_training=is_training)

        if is_training:
            with tf.name_scope('loss'):
                l_loss = l1_loss(out=out, y=wav)
                p_loss = power_loss(out=tf.squeeze(out, -1),
                                    y=tf.squeeze(wav, -1),
                                    win_length=hp.signal.win_length,
                                    hop_length=hp.signal.hop_length)
                tf.summary.scalar('likelihood', l_loss)
                self.cost = l_loss + hp.train.weight_power_loss * p_loss
                if hp.train.weight_power_loss > 0:
                    tf.summary.scalar('power', p_loss)
                tf.summary.scalar('total_loss', self.cost)
    def __init__(self, proposals, roi_func, fastrcnn_head_func, image_shape2d,
                 num_classes):
        """
        Args:
            proposals: BoxProposals
            roi_func (boxes -> features): a function to crop features with rois
            fastrcnn_head_func (features -> features): the fastrcnn head to apply on the cropped features
        """
        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)

        self.num_cascade_stages = cfg.CASCADE.NUM_STAGES

        self.is_training = get_current_tower_context().is_training
        if self.is_training:

            @tf.contrib.eager.custom_gradient
            def scale_gradient(x):
                return x, lambda dy: dy * (1.0 / self.num_cascade_stages)

            self.scale_gradient = scale_gradient
            self.gt_boxes = proposals.gt_boxes
            self.gt_labels = proposals.gt_labels
        else:
            self.scale_gradient = tf.identity

        ious = cfg.CASCADE.IOUS
        # It's unclear how to do >3 stages, so it does not make sense to implement them
        assert self.num_cascade_stages == 3, "Only 3-stage cascade was implemented!"
        with tf.variable_scope('cascade_rcnn_stage1'):
            H1, B1 = self.run_head(self.proposals, 0)

        with tf.variable_scope('cascade_rcnn_stage2'):
            B1_proposal = self.match_box_with_gt(B1, ious[1])
            H2, B2 = self.run_head(B1_proposal, 1)

        with tf.variable_scope('cascade_rcnn_stage3'):
            B2_proposal = self.match_box_with_gt(B2, ious[2])
            H3, B3 = self.run_head(B2_proposal, 2)
        self._cascade_boxes = [B1, B2, B3]
        self._heads = [H1, H2, H3]
Esempio n. 31
0
    def build_graph(self, comb_state, action, reward, isOver):
        comb_state = tf.cast(comb_state, tf.float32)
        comb_state = tf.reshape(
            comb_state, [-1] + list(self._shape2d) + [self.history + 1, self.channel])

        state = tf.slice(comb_state, [0, 0, 0, 0, 0], [-1, -1, -1, self.history, -1])
        state = tf.reshape(state, self._shape4d_for_prediction, name='state')
        self.predict_value = self.get_DQN_prediction(state)
        if not get_current_tower_context().is_training:
            return

        reward = tf.clip_by_value(reward, -1, 1)
        next_state = tf.slice(comb_state, [0, 0, 0, 1, 0], [-1, -1, -1, self.history, -1], name='next_state')
        next_state = tf.reshape(next_state, self._shape4d_for_prediction)
        action_onehot = tf.one_hot(action, self.num_actions, 1.0, 0.0)

        pred_action_value = tf.reduce_sum(self.predict_value * action_onehot, 1)  # N,
        max_pred_reward = tf.reduce_mean(tf.reduce_max(
            self.predict_value, 1), name='predict_reward')
        summary.add_moving_summary(max_pred_reward)

        with tf.variable_scope('target'), varreplace.freeze_variables(skip_collection=True):
            targetQ_predict_value = self.get_DQN_prediction(next_state)    # NxA

        if self.method != 'Double':
            # DQN
            best_v = tf.reduce_max(targetQ_predict_value, 1)    # N,
        else:
            # Double-DQN
            next_predict_value = self.get_DQN_prediction(next_state)
            self.greedy_choice = tf.argmax(next_predict_value, 1)   # N,
            predict_onehot = tf.one_hot(self.greedy_choice, self.num_actions, 1.0, 0.0)
            best_v = tf.reduce_sum(targetQ_predict_value * predict_onehot, 1)

        target = reward + (1.0 - tf.cast(isOver, tf.float32)) * self.gamma * tf.stop_gradient(best_v)

        cost = tf.losses.huber_loss(
            target, pred_action_value, reduction=tf.losses.Reduction.MEAN)
        summary.add_param_summary(('conv.*/W', ['histogram', 'rms']),
                                  ('fc.*/W', ['histogram', 'rms']))   # monitor all W
        summary.add_moving_summary(cost)
        return cost
Esempio n. 32
0
def DepthwiseSeparableConvWithTrackedMults(inputs,
                                           out_channels,
                                           depth_multiplier=0.25,
                                           network_complexity=None,
                                           downsample=False,
                                           nl=tf.identity):
    out_channels = round(out_channels * depth_multiplier)
    filter_shape = [3, 3]
    _stride = 2 if downsample else 1

    # tf.nn.relu is default activation; applies activation after batchnorm, but setting normalizer=None
    depthwise_conv = tf.contrib.layers.separable_conv2d(
        inputs,
        num_outputs=None,
        depth_multiplier=1,
        stride=_stride,
        kernel_size=filter_shape,
        biases_initializer=None,
        normalizer_fn=None,
        activation_fn=nl)

    pointwise_conv = tf.identity(depthwise_conv, name='output')

    if get_current_tower_context().is_main_training_tower:
        in_shape = inputs.get_shape().as_list()
        network_complexity['weights'] += filter_shape[0] * filter_shape[
            1] * in_shape[-1]  # assuming 'NHWC'
        network_complexity['mults'] += in_shape[1] * in_shape[
            2] * filter_shape[0] * filter_shape[1] * in_shape[-1]

    # network complexity handled in Conv2DWithTrackedMults
    pointwise_conv = Conv2DWithTrackedMults(
        'PointwiseConv2D',
        depthwise_conv,
        out_channels,
        kernel_shape=1,
        use_bias=False,
        nl=nl,
        network_complexity=network_complexity)

    return pointwise_conv
Esempio n. 33
0
    def _build_graph(self, inputs):
        self.x_mfccs, self.y_ppgs = inputs
        is_training = get_current_tower_context().is_training
        with tf.variable_scope('net1'):
            self.ppgs, self.preds, self.logits = self.network(
                self.x_mfccs, is_training)
        self.cost = self.loss()
        acc = self.acc()

        if is_training:
            # summaries
            tf.summary.scalar('net1/train/loss', self.cost)
            tf.summary.scalar('net1/train/acc', acc)

        if not is_training:
            # summaries
            self.cost = tf.identity(self.cost, name="net1/eval/loss")
            acc = tf.identity(acc, name="net1/eval/acc")

            tf.summary.scalar('net1/eval/loss', self.cost)
            tf.summary.scalar('net1/eval/acc', acc)
Esempio n. 34
0
    def __init__(self, proposals,
                 roi_func, fastrcnn_head_func, image_shape2d, num_classes):
        """
        Args:
            proposals: BoxProposals
            roi_func (boxes -> features): a function to crop features with rois
            fastrcnn_head_func (features -> features): the fastrcnn head to apply on the cropped features
        """
        for k, v in locals().items():
            if k != 'self':
                setattr(self, k, v)

        self.num_cascade_stages = cfg.CASCADE.NUM_STAGES

        self.is_training = get_current_tower_context().is_training
        if self.is_training:
            @tf.custom_gradient
            def scale_gradient(x):
                return x, lambda dy: dy * (1.0 / self.num_cascade_stages)
            self.scale_gradient = scale_gradient
            self.gt_boxes = proposals.gt_boxes
            self.gt_labels = proposals.gt_labels
        else:
            self.scale_gradient = tf.identity

        ious = cfg.CASCADE.IOUS
        # It's unclear how to do >3 stages, so it does not make sense to implement them
        assert self.num_cascade_stages == 3, "Only 3-stage cascade was implemented!"
        with tf.variable_scope('cascade_rcnn_stage1'):
            H1, B1 = self.run_head(self.proposals, 0)

        with tf.variable_scope('cascade_rcnn_stage2'):
            B1_proposal = self.match_box_with_gt(B1, ious[1])
            H2, B2 = self.run_head(B1_proposal, 1)

        with tf.variable_scope('cascade_rcnn_stage3'):
            B2_proposal = self.match_box_with_gt(B2, ious[2])
            H3, B3 = self.run_head(B2_proposal, 2)
        self._cascade_boxes = [B1, B2, B3]
        self._heads = [H1, H2, H3]
def RescaleActivationLayer(inputs, decay=0.9, bit_a=8):
    in_shape = inputs.get_shape().as_list()
    moving_max = tf.get_variable('activation_max/EMA', [in_shape[-1]],
                                 initializer=tf.constant_initializer(),
                                 trainable=False)
    moving_min = tf.get_variable('activation_min/EMA', [in_shape[-1]],
                                 initializer=tf.constant_initializer(),
                                 trainable=False)

    named_inputs = tf.identity(inputs, name='rescaling_input_activation')
    # xn = (named_inputs - moving_min) / tf.pow(tf.constant(2.0), log2(moving_max) - tf.constant(float(bit_a)))
    xn = (named_inputs -
          (moving_min + moving_max) / 2.0) / (moving_max - moving_min)
    named_xn = tf.identity(xn, name='rescaled_activation')
    named_xn = tf.Print(named_xn, [named_xn])

    ctx = get_current_tower_context()
    if ctx.is_main_training_tower:
        ret = update_ema(xn, moving_max, moving_min, decay)
    else:
        ret = tf.identity(xn, name='output')
    vh = ret.variables = VariableHolder(mean=moving_max, variance=moving_min)
    return ret