Ejemplo n.º 1
0
def attention_based_dropout(input_, option):
    def _get_importance_map(attention):
        return tf.sigmoid(attention)

    def _get_drop_mask(attention, drop_thr):
        max_val = tf.reduce_max(attention, axis=[1, 2, 3], keepdims=True)
        thr_val = max_val * drop_thr
        return tf.cast(attention < thr_val, dtype=tf.float32, name='drop_mask')

    def _select_component(importance_map, drop_mask, drop_prob):
        random_tensor = tf.random_uniform([], drop_prob, 1. + drop_prob)
        binary_tensor = tf.cast(tf.floor(random_tensor), dtype=tf.float32)
        return (1. - binary_tensor) * importance_map + binary_tensor * drop_mask

    ctx = get_current_tower_context()
    is_training = ctx.is_training

    drop_prob = 1 - option.adl_keep_prob
    drop_thr = option.adl_threshold

    if is_training:
        attention_map = tf.reduce_mean(input_, axis=1, keepdims=True)
        importance_map = _get_importance_map(attention_map)
        drop_mask = _get_drop_mask(attention_map, drop_thr)
        selected_map = _select_component(importance_map, drop_mask, drop_prob)
        output = input_ * selected_map
        return output

    else:
        return input_
Ejemplo n.º 2
0
def Dropout(x, *args, **kwargs):
    """
    Same as `tf.layers.dropout`.
    However, for historical reasons, the first positional argument is
    interpreted as keep_prob rather than drop_prob.
    Explicitly use `rate=` keyword arguments to ensure things are consistent.
    """
    if 'is_training' in kwargs:
        kwargs['training'] = kwargs.pop('is_training')
    if len(args) > 0:
        if args[0] != 0.5:
            logger.warn(
                "The first positional argument to tensorpack.Dropout is the probability to keep, rather than to drop. "
                "This is different from the rate argument in tf.layers.Dropout due to historical reasons. "
                "To mimic tf.layers.Dropout, explicitly use keyword argument 'rate' instead"
            )
        rate = 1 - args[0]
    elif 'keep_prob' in kwargs:
        assert 'rate' not in kwargs, "Cannot set both keep_prob and rate!"
        rate = 1 - kwargs.pop('keep_prob')
    elif 'rate' in kwargs:
        rate = kwargs.pop('rate')
    else:
        rate = 0.5

    if kwargs.get('training', None) is None:
        kwargs['training'] = get_current_tower_context().is_training

    if get_tf_version_tuple() <= (1, 12):
        return tf.layers.dropout(x, rate=rate, **kwargs)
    else:
        return tf.nn.dropout(x, rate=rate if kwargs['training'] else 0.)
Ejemplo n.º 3
0
    def _build_graph(self, inputs):
        state, action, futurereward = inputs
        policy, self.value = self._get_NN_prediction(state)
        self.value = tf.squeeze(self.value, [1], name='pred_value') # (B,)
        self.logits = tf.nn.softmax(policy, name='logits')

        expf = tf.get_variable('explore_factor', shape=[],
                initializer=tf.constant_initializer(1), trainable=False)
        logitsT = tf.nn.softmax(policy * expf, name='logitsT') #The larger expf, the less exploration
        is_training = get_current_tower_context().is_training
        if not is_training:
            return
        log_probs = tf.log(self.logits + 1e-6)

        log_pi_a_given_s = tf.reduce_sum(
                log_probs * tf.one_hot(action, NUM_ACTIONS), 1)
        advantage = tf.sub(tf.stop_gradient(self.value), futurereward, name='advantage')
        policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage, name='policy_loss')
        xentropy_loss = tf.reduce_sum(
                self.logits * log_probs, name='xentropy_loss')
        value_loss = tf.nn.l2_loss(self.value - futurereward, name='value_loss')

        pred_reward = tf.reduce_mean(self.value, name='predict_reward')
        advantage = symbf.rms(advantage, name='rms_advantage')
        summary.add_moving_summary(policy_loss, xentropy_loss, value_loss, pred_reward, advantage)
        entropy_beta = tf.get_variable('entropy_beta', shape=[],
                initializer=tf.constant_initializer(0.01), trainable=False)
        self.cost = tf.add_n([policy_loss, xentropy_loss * entropy_beta, value_loss])
        self.cost = tf.truediv(self.cost,
                tf.cast(tf.shape(futurereward)[0], tf.float32),
                name='cost')
Ejemplo n.º 4
0
def generate_fpn_proposals(multilevel_pred_boxes, multilevel_label_logits,
                           image_shape2d):
    """
    Args:
        multilevel_pred_boxes: #lvl HxWxAx4 boxes
        multilevel_label_logits: #lvl tensors of shape HxWxA

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_pred_boxes) == num_lvl
    assert len(multilevel_label_logits) == num_lvl

    training = get_current_tower_context().is_training
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        for lvl in range(num_lvl):
            with tf.name_scope('Lvl{}'.format(lvl + 2)):
                pred_boxes_decoded = multilevel_pred_boxes[lvl]

                proposal_boxes, proposal_scores = generate_rpn_proposals(
                    tf.reshape(pred_boxes_decoded, [-1, 4]),
                    tf.reshape(multilevel_label_logits[lvl], [-1]),
                    image_shape2d, fpn_nms_topk)
                all_boxes.append(proposal_boxes)
                all_scores.append(proposal_scores)

        proposal_boxes = tf.concat(all_boxes, axis=0)  # nx4
        proposal_scores = tf.concat(all_scores, axis=0)  # n
        # Here we are different from Detectron.
        # Detectron picks top-k within the batch, rather than within an image. However we do not have a batch.
        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=proposal_topk,
                                                    sorted=False)
        proposal_boxes = tf.gather(proposal_boxes,
                                   topk_indices,
                                   name="all_proposals")
    else:
        for lvl in range(num_lvl):
            with tf.name_scope('Lvl{}'.format(lvl + 2)):
                pred_boxes_decoded = multilevel_pred_boxes[lvl]
                all_boxes.append(tf.reshape(pred_boxes_decoded, [-1, 4]))
                all_scores.append(
                    tf.reshape(multilevel_label_logits[lvl], [-1]))
        all_boxes = tf.concat(all_boxes, axis=0)
        all_scores = tf.concat(all_scores, axis=0)
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            all_boxes, all_scores, image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if training else cfg.RPN.TEST_POST_NMS_TOPK)

    tf.sigmoid(proposal_scores, name='probs')  # for visualization
    return tf.stop_gradient(proposal_boxes, name='boxes'), \
        tf.stop_gradient(proposal_scores, name='scores')
Ejemplo n.º 5
0
    def build_graph(self, image, label, bbox):
        ctx = get_current_tower_context()
        is_training = ctx.is_training

        image = image_preprocess(image, args, bgr=True)
        image = tf.transpose(image, [0, 3, 1, 2])  # NCHW
        label_onehot = tf.one_hot(label, args.classnum)
        image_summaries('input-images', image)

        logits, convmaps = vgg_gap(image, args)

        _, indices = tf.nn.top_k(logits, 5)
        indices = tf.identity(indices, name='top5')

        # Grad-CAM
        activation_map = tf.identity(tf.cast(convmaps, tf.float32),
                                     name='actmap')
        y_c = tf.reduce_sum(tf.multiply(logits, label_onehot), axis=1)
        target_conv_layer_grad = tf.identity(tf.cast(
            tf.gradients(y_c, convmaps)[0], tf.float32),
                                             name='grad')

        # Compute loss
        loss = compute_loss_and_error(logits, label)
        wd_cost = regularize_cost('.*/W',
                                  l2_regularizer(5e-4),
                                  name='l2_regularize_loss')

        add_moving_summary(loss, wd_cost)
        return tf.add_n([loss, wd_cost], name='cost')
Ejemplo n.º 6
0
def generate_fpn_proposals(multilevel_anchors, multilevel_label_logits,
                           multilevel_box_logits, image_shape2d):
    """
    Args:
        multilevel_anchors: #lvl RPNAnchors
        multilevel_label_logits: #lvl tensors of shape HxWxA
        multilevel_box_logits: #lvl tensors of shape HxWxAx4

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_anchors) == num_lvl
    assert len(multilevel_label_logits) == num_lvl
    assert len(multilevel_box_logits) == num_lvl

    ctx = get_current_tower_context()
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if ctx.is_training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        for lvl in range(num_lvl):
            with tf.name_scope('FPNProposal_Lvl{}'.format(lvl + 2)):
                anchors = multilevel_anchors[lvl]
                pred_boxes_decoded = anchors.decode_logits(
                    multilevel_box_logits[lvl])

                proposal_boxes, proposal_scores = generate_rpn_proposals(
                    tf.reshape(pred_boxes_decoded, [-1, 4]),
                    tf.reshape(multilevel_label_logits[lvl], [-1]),
                    image_shape2d, fpn_nms_topk)
                all_boxes.append(proposal_boxes)
                all_scores.append(proposal_scores)

        proposal_boxes = tf.concat(all_boxes, axis=0)  # nx4
        proposal_scores = tf.concat(all_scores, axis=0)  # n
        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores,
                                                    k=proposal_topk,
                                                    sorted=False)
        proposal_boxes = tf.gather(proposal_boxes, topk_indices)
    else:
        for lvl in range(num_lvl):
            with tf.name_scope('FPNProposal_Lvl{}'.format(lvl + 2)):
                anchors = multilevel_anchors[lvl]
                pred_boxes_decoded = anchors.decode_logits(
                    multilevel_box_logits[lvl])
                all_boxes.append(tf.reshape(pred_boxes_decoded, [-1, 4]))
                all_scores.append(
                    tf.reshape(multilevel_label_logits[lvl], [-1]))
        all_boxes = tf.concat(all_boxes, axis=0)
        all_scores = tf.concat(all_scores, axis=0)
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            all_boxes, all_scores, image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK
            if ctx.is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK
            if ctx.is_training else cfg.RPN.TEST_POST_NMS_TOPK)

    return proposal_boxes, proposal_scores
Ejemplo n.º 7
0
    def build_graph(self, image, label):
        image = tf.expand_dims(image, 3) * 2 - 1
        ctx = get_current_tower_context()

        M = get_keras_model()
        logits = M(image)
        if ctx.is_main_training_tower:
            for op in M.updates:
                tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, op)

        # build cost function by tensorflow
        cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                              labels=label)
        cost = tf.reduce_mean(
            cost, name='cross_entropy_loss')  # the average cross-entropy loss

        # for tensorpack validation
        acc = tf.cast(tf.nn.in_top_k(logits, label, 1), tf.float32)
        acc = tf.reduce_mean(acc, name='accuracy')
        summary.add_moving_summary(acc)

        wd_cost = tf.add_n(
            M.losses,
            name='regularize_loss')  # this is how Keras manage regularizers
        cost = tf.add_n([wd_cost, cost], name='total_cost')
        summary.add_moving_summary(cost, wd_cost)
        return cost
Ejemplo n.º 8
0
def addParamSummary(*summary_lists):
    """
    Add summary Ops for all trainable variables matching the regex.

    Args:
        summary_lists (list): each is (regex, [list of summary type to perform]).
        Summary type can be 'mean', 'scalar', 'histogram', 'sparsity', 'rms'
    """
    from tensorpack.tfutils.tower import get_current_tower_context
    from tensorpack.utils.develop import log_deprecated
    from tensorpack.tfutils.symbolic_functions import rms
    import re
    import tensorflow as tf
    ctx = get_current_tower_context()
    if ctx is not None and not ctx.is_main_training_tower:
        return
    if len(summary_lists) == 1 and isinstance(summary_lists[0], list):
        log_deprecated(
            text=
            "Use positional args to call add_param_summary() instead of a list."
        )
        summary_lists = summary_lists[0]

    def perform(var, action):
        ndim = var.get_shape().ndims
        name = var.name.replace(':0', '')
        if action == 'scalar':
            assert ndim == 0, "Scalar summary on high-dimension data. Maybe you want 'mean'?"
            tf.summary.scalar(name, var)
            return
        assert ndim > 0, "Cannot perform {} summary on scalar data".format(
            action)
        if action == 'histogram':
            tf.summary.histogram(name, var)
            return
        if action == 'sparsity':
            tf.summary.scalar(name + '-sparsity', tf.nn.zero_fraction(var))
            return
        if action == 'mean':
            tf.summary.scalar(name + '-mean', tf.reduce_mean(var))
            return
        if action == 'rms':
            tf.summary.scalar(name + '-rms', rms(var))
            return
        if action == 'absmax':
            tf.summary.scalar(name + '-absmax', tf.reduce_max(tf.abs(var)))
            return
        raise RuntimeError("Unknown summary type: {}".format(action))

    params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    with tf.name_scope('00/SummaryParam'):
        for p in params:
            name = p.name
            for rgx, actions in summary_lists:
                if not rgx.endswith('$'):
                    rgx = rgx + '(:0)?$'
                if re.match(rgx, name):
                    for act in actions:
                        perform(p, act)
Ejemplo n.º 9
0
def vgg_gap(image, option, importance=False):
    ctx = get_current_tower_context()
    is_training = ctx.is_training

    with argscope(Conv2D,
        kernel_initializer=tf.variance_scaling_initializer(scale=2.)), \
            argscope([Conv2D, MaxPooling, BatchNorm, GlobalAvgPooling],
                data_format='channels_first'):

        l = convnormrelu(image, 'conv1_1', 64, option)
        if option.attdrop[11]: l = ADL(11, l, option)
        l = convnormrelu(l, 'conv1_2', 64, option)
        if option.attdrop[12]: l = ADL(12, l, option)
        l = MaxPooling('pool1', l, 2)
        if option.attdrop[1]: l = ADL(1, l, option)

        l = convnormrelu(l, 'conv2_1', 128, option)
        if option.attdrop[21]: l = ADL(21, l, option)
        l = convnormrelu(l, 'conv2_2', 128, option)
        if option.attdrop[22]: l = ADL(21, l, option)
        l = MaxPooling('pool2', l, 2)
        if option.attdrop[2]: l = ADL(2, l, option)

        l = convnormrelu(l, 'conv3_1', 256, option)
        if option.attdrop[31]: l = ADL(31, l, option)
        l = convnormrelu(l, 'conv3_2', 256, option)
        if option.attdrop[32]: l = ADL(32, l, option)
        l = convnormrelu(l, 'conv3_3', 256, option)
        if option.attdrop[33]: l = ADL(33, l, option)
        l = MaxPooling('pool3', l, 2)
        if option.attdrop[3]: l = ADL(3, l, option)

        l = convnormrelu(l, 'conv4_1', 512, option)
        if option.attdrop[41]: l = ADL(41, l, option)
        l = convnormrelu(l, 'conv4_2', 512, option)
        if option.attdrop[42]: l = ADL(42, l, option)
        l = convnormrelu(l, 'conv4_3', 512, option)
        if option.attdrop[43]: l = ADL(43, l, option)
        l = MaxPooling('pool4', l, 2)
        if option.attdrop[4]: l = ADL(4, l, option)

        l = convnormrelu(l, 'conv5_1', 512, option)
        if option.attdrop[51]: l = ADL(51, l, option)
        l = convnormrelu(l, 'conv5_2', 512, option)
        if option.attdrop[52]: l = ADL(52, l, option)
        l = convnormrelu(l, 'conv5_3', 512, option)
        if option.attdrop[53]: l = ADL(53, l, option)

        convmaps = convnormrelu(l, 'new', 1024, option)
        if option.attdrop[6]: l = ADL(6, l, option)

        pre_logits = GlobalAvgPooling('gap', convmaps)
        logits = FullyConnected(
            'linear',
            pre_logits,
            option.classnum,
            kernel_initializer=tf.random_normal_initializer(stddev=0.01))

        return logits, convmaps
Ejemplo n.º 10
0
def generate_fpn_proposals(
    multilevel_anchors, multilevel_label_logits,
        multilevel_box_logits, image_shape2d):
    """
    Args:
        multilevel_anchors: #lvl RPNAnchors
        multilevel_label_logits: #lvl tensors of shape HxWxA
        multilevel_box_logits: #lvl tensors of shape HxWxAx4

    Returns:
        boxes: kx4 float
        scores: k logits
    """
    num_lvl = len(cfg.FPN.ANCHOR_STRIDES)
    assert len(multilevel_anchors) == num_lvl
    assert len(multilevel_label_logits) == num_lvl
    assert len(multilevel_box_logits) == num_lvl

    ctx = get_current_tower_context()
    all_boxes = []
    all_scores = []
    if cfg.FPN.PROPOSAL_MODE == 'Level':
        fpn_nms_topk = cfg.RPN.TRAIN_PER_LEVEL_NMS_TOPK if ctx.is_training else cfg.RPN.TEST_PER_LEVEL_NMS_TOPK
        for lvl in range(num_lvl):
            with tf.name_scope('Lvl{}'.format(lvl + 2)):
                anchors = multilevel_anchors[lvl]
                pred_boxes_decoded = anchors.decode_logits(multilevel_box_logits[lvl])

                proposal_boxes, proposal_scores = generate_rpn_proposals(
                    tf.reshape(pred_boxes_decoded, [-1, 4]),
                    tf.reshape(multilevel_label_logits[lvl], [-1]),
                    image_shape2d, fpn_nms_topk)
                all_boxes.append(proposal_boxes)
                all_scores.append(proposal_scores)

        proposal_boxes = tf.concat(all_boxes, axis=0)  # nx4
        proposal_scores = tf.concat(all_scores, axis=0)  # n
        proposal_topk = tf.minimum(tf.size(proposal_scores), fpn_nms_topk)
        proposal_scores, topk_indices = tf.nn.top_k(proposal_scores, k=proposal_topk, sorted=False)
        proposal_boxes = tf.gather(proposal_boxes, topk_indices)
    else:
        for lvl in range(num_lvl):
            with tf.name_scope('Lvl{}'.format(lvl + 2)):
                anchors = multilevel_anchors[lvl]
                pred_boxes_decoded = anchors.decode_logits(multilevel_box_logits[lvl])
                all_boxes.append(tf.reshape(pred_boxes_decoded, [-1, 4]))
                all_scores.append(tf.reshape(multilevel_label_logits[lvl], [-1]))
        all_boxes = tf.concat(all_boxes, axis=0)
        all_scores = tf.concat(all_scores, axis=0)
        proposal_boxes, proposal_scores = generate_rpn_proposals(
            all_boxes, all_scores, image_shape2d,
            cfg.RPN.TRAIN_PRE_NMS_TOPK if ctx.is_training else cfg.RPN.TEST_PRE_NMS_TOPK,
            cfg.RPN.TRAIN_POST_NMS_TOPK if ctx.is_training else cfg.RPN.TEST_POST_NMS_TOPK)

    tf.sigmoid(proposal_scores, name='probs')  # for visualization
    return tf.stop_gradient(proposal_boxes, name='boxes'), \
        tf.stop_gradient(proposal_scores, name='scores')
Ejemplo n.º 11
0
    def build_graph(self, image, label):
        is_training = get_current_tower_context().is_main_training_tower
        image_origin = ImageNetModel.image_preprocess(
            image, bgr=self.image_bgr)  # [N, H, W, C]
        loss, logit = 0, {}
        scales = sorted(self.scales, reverse=True)
        # sorted_scales = sorted(list(set(scales + self.scales)), reverse=True)
        for scale in scales:
            image = tf.image.resize_images(
                image_origin, [scale, scale],
                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
            if self.data_format == 'NCHW':
                image = tf.transpose(image, [0, 3, 1, 2])
            with tf.variable_scope('imagenet', reuse=tf.AUTO_REUSE):
                logit[scale] = self.get_logits(image, scale)
            loss_scale = self.compute_loss_and_error(logit[scale], label,
                                                     scale, is_training)
            loss += loss_scale

        if self.distill:
            logit_ensemble = 0
            alpha = tf.get_variable('alpha', [len(scales)],
                                    initializer=tf.constant_initializer(1))
            alpha_soft = tf.nn.softmax(alpha)  # TODO: remove softmax
            for i, scale in enumerate(scales):
                logit_ensemble += alpha_soft[i] * tf.stop_gradient(
                    logit[scale])
                tf.summary.scalar('alpha%03d' % scale, alpha_soft[i])
            loss_ensemble = self.compute_loss_and_error(
                logit_ensemble, label, 'ensemble', is_training)
            loss += loss_ensemble
            loss_distill = 0
            soft_label = tf.stop_gradient(tf.nn.softmax(logit_ensemble))
            for scale in scales:
                loss_distill += self.compute_distill_loss(
                    logit[scale], soft_label)
            if DISTILL_TYPE == 'top-down':
                for i in range(len(scales) - 1):
                    soft_label = tf.stop_gradient(
                        tf.nn.softmax(logit[scales[i]]))
                    for j in range(i + 1, len(scales)):
                        loss_distill += self.compute_distill_loss(
                            logit[scales[j]], soft_label)
                distill_num = len(scales) * (len(scales) + 1) / 2
                loss += SOFTMAX_TEM**2 * loss_distill / distill_num * len(
                    scales)
            else:
                loss += SOFTMAX_TEM**2 * loss_distill

        wd_loss = regularize_cost(self.weight_decay_pattern,
                                  tf.contrib.layers.l2_regularizer(
                                      self.weight_decay),
                                  name='l2_regularize_loss')
        add_moving_summary(loss, wd_loss)
        self.cost = tf.add_n([loss, wd_loss], name='cost')
        return self.cost
Ejemplo n.º 12
0
 def tower_func(image, label):
     assert not get_current_tower_context().is_training
     image = self.image_preprocess(image)
     image = tf.transpose(image, [0, 3, 1, 2])
     image, target_label = attacker.attack(image, label,
                                           self.get_logits)
     logits = self.get_logits(image)
     ImageNetModel.compute_loss_and_error(
         logits, label)  # compute top-1 and top-5
     AdvImageNetModel.compute_attack_success(logits, target_label)
 def __init__(self, drop_path_keep_prob, max_train_steps, total_depth):
     self.max_train_steps = max_train_steps
     self.total_depth = total_depth
     self.is_training = get_current_tower_context().is_training
     self.drop_path_keep_prob = drop_path_keep_prob
     self.do_drop_path = (
         self.is_training and
         self.drop_path_keep_prob is not None and
         self.drop_path_keep_prob < 1.0
     )
Ejemplo n.º 14
0
def addParamSummary(*summary_lists):
    """
    Add summary Ops for all trainable variables matching the regex.

    Args:
        summary_lists (list): each is (regex, [list of summary type to perform]).
        Summary type can be 'mean', 'scalar', 'histogram', 'sparsity', 'rms'
    """
    from tensorpack.tfutils.tower import get_current_tower_context
    from tensorpack.utils.develop import log_deprecated
    from tensorpack.tfutils.symbolic_functions import rms
    import re
    import tensorflow as tf
    ctx = get_current_tower_context()
    if ctx is not None and not ctx.is_main_training_tower:
        return
    if len(summary_lists) == 1 and isinstance(summary_lists[0], list):
        log_deprecated(text="Use positional args to call add_param_summary() instead of a list.")
        summary_lists = summary_lists[0]

    def perform(var, action):
        ndim = var.get_shape().ndims
        name = var.name.replace(':0', '')
        if action == 'scalar':
            assert ndim == 0, "Scalar summary on high-dimension data. Maybe you want 'mean'?"
            tf.summary.scalar(name, var)
            return
        assert ndim > 0, "Cannot perform {} summary on scalar data".format(action)
        if action == 'histogram':
            tf.summary.histogram(name, var)
            return
        if action == 'sparsity':
            tf.summary.scalar(name + '-sparsity', tf.nn.zero_fraction(var))
            return
        if action == 'mean':
            tf.summary.scalar(name + '-mean', tf.reduce_mean(var))
            return
        if action == 'rms':
            tf.summary.scalar(name + '-rms', rms(var))
            return
        if action == 'absmax':
            tf.summary.scalar(name + '-absmax', tf.reduce_max(tf.abs(var)))
            return
        raise RuntimeError("Unknown summary type: {}".format(action))

    params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
    with tf.name_scope('00/SummaryParam'):
        for p in params:
            name = p.name
            for rgx, actions in summary_lists:
                if not rgx.endswith('$'):
                    rgx = rgx + '(:0)?$'
                if re.match(rgx, name):
                    for act in actions:
                        perform(p, act)
Ejemplo n.º 15
0
 def _get_logits_by_slim_model(self, inputs):
     ctx = get_current_tower_context()
     with tf.contrib.slim.arg_scope(
             resnet_v2.resnet_arg_scope(batch_norm_decay=0.9997)):
         logits, end_points = resnet_v2.resnet_v2_101(
             inputs,
             num_classes=None,
             is_training=ctx.is_training,
             global_pool=False,
             output_stride=16)
     net = end_points['resnet_v2_101/block4']
     return net
Ejemplo n.º 16
0
 def fn():
     tlist = []
     ctx = get_current_tower_context()
     assert ctx is not None
     assert len(self.shapes) == len(self._desc)
     for idx, p in enumerate(self._desc):
         tlist.append(
             tf.constant(0,
                         dtype=p.type,
                         name='dummy-{}-{}'.format(p.name, ctx.index),
                         shape=self.shapes[idx]))
     return tlist
Ejemplo n.º 17
0
    def _get_cost_and_grad(self):
        from tensorpack.tfutils.gradproc import FilterNoneGrad
        ctx = get_current_tower_context()
        assert ctx is not None and ctx.is_training, ctx

        # cost = self.get_cost()    # assume single cost
        loss_policy, loss_value = self._cost
        opt_a, opt_v = self.get_optimizer()
        grads_a = opt_a.compute_gradients(loss_policy, var_list=self._weights_actor, colocate_gradients_with_ops=True)
        grads_a = FilterNoneGrad().process(grads_a)
        grads_v = opt_v.compute_gradients(loss_value, var_list=self._weights_critic, colocate_gradients_with_ops=True)
        grads_v = FilterNoneGrad().process(grads_v)
        return self._cost, [grads_a, grads_v]
Ejemplo n.º 18
0
def resnet(input_, DEPTH, option):
    ctx = get_current_tower_context()
    is_training = ctx.is_training

    mode = option.mode
    basicblock = preresnet_basicblock \
                    if mode == 'preact' else resnet_basicblock
    bottleneck = {
        'resnet': resnet_bottleneck,
        'preact': preresnet_bottleneck,
        'se': se_resnet_bottleneck
    }[mode]

    cfg = {
        18: ([2, 2, 2, 2], basicblock),
        34: ([3, 4, 6, 3], basicblock),
        50: ([3, 4, 6, 3], bottleneck),
        101: ([3, 4, 23, 3], bottleneck),
        152: ([3, 8, 36, 3], bottleneck)
    }
    defs, block_func = cfg[DEPTH]
    group_func = preresnet_group if mode == 'preact' else resnet_group

    with argscope(Conv2D, use_bias=False, kernel_initializer= \
            tf.variance_scaling_initializer(scale=2.0, mode='fan_out')), \
            argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm],
                                            data_format='channels_first'):

        l = Conv2D('conv0', input_, 64, 7, strides=2, activation=BNReLU)  # 112
        if option.attdrop[0]: l = ADL(0, l, option)

        l = MaxPooling('pool0', l, 3, strides=2, padding='SAME')  # 56
        if option.attdrop[1]: l = ADL(1, l, option)

        l = group_func('group0', l, block_func, 64, defs[0], 1, option)  # 56
        if option.attdrop[2]: l = ADL(2, l, option)

        l = group_func('group1', l, block_func, 128, defs[1], 2, option)  # 28
        if option.attdrop[3]: l = ADL(3, l, option)

        l = group_func('group2', l, block_func, 256, defs[2], 2, option)  # 14
        if option.attdrop[4]: l = ADL(4, l, option)

        l = group_func('group3', l, block_func, 512, defs[3],
                       option.laststride, option)  # 7
        if option.attdrop[5]: l = ADL(5, l, option)

        prelogits = GlobalAvgPooling('gap', l)
        logits = FullyConnected('linearnew', prelogits, option.classnum)

    return logits, l
Ejemplo n.º 19
0
    def locked_dropout(self, x, keep_prob):
        """
        Variational (locked) dropout. We make sure
        the drop-out mask is the same at all time steps.
        """
        is_training = get_current_tower_context().is_training
        do_dropout = keep_prob is not None and keep_prob < 1.0 and is_training
        if not do_dropout:
            return x

        x_shape = x.get_shape().as_list()
        x_shape[self.t_dim] = 1
        mask = tf.random_uniform(x_shape, minval=0, maxval=1, dtype=tf.float32)
        mask = tf.floor(mask + keep_prob) / keep_prob
        return tf.multiply(mask, x)
Ejemplo n.º 20
0
    def _get_cost_and_grad(self):
        from tensorpack.tfutils.gradproc import FilterNoneGrad
        ctx = get_current_tower_context()
        assert ctx is not None and ctx.is_training, ctx

        # cost = self.get_cost()    # assume single cost
        loss_policy, loss_value = self._cost
        opt_a, opt_v = self.get_optimizer()
        grads_a = opt_a.compute_gradients(loss_policy,
                                          var_list=self._weights_actor,
                                          colocate_gradients_with_ops=True)
        grads_a = FilterNoneGrad().process(grads_a)
        grads_v = opt_v.compute_gradients(loss_value,
                                          var_list=self._weights_critic,
                                          colocate_gradients_with_ops=True)
        grads_v = FilterNoneGrad().process(grads_v)
        return self._cost, [grads_a, grads_v]
Ejemplo n.º 21
0
    def build_graph(self, image, label, xa, ya, xb, yb):
        image = image_preprocess(
            image, bgr=True)  # image = (image - image_mean) / image_std
        label_onehot = tf.one_hot(label, 200)

        ctx = get_current_tower_context()
        isTrain = ctx.is_training

        cfg = {
            18: ([2, 2, 2, 2]),
            34: ([3, 4, 6, 3]),
        }
        defs = cfg[DEPTH]

        convmaps = Spec_Conv2D('conv0', image, 64, 7, stride=1, sn=args.sn)
        convmaps = batch_norm_resnet(convmaps, isTrain, 'bnfirst')
        convmaps = tf.nn.relu(convmaps, 'relufirst')
        #convmaps = MaxPooling('pool0', convmaps, 3, strides=2, padding='SAME') # 32x32
        convmaps = preresnet_group('group0', convmaps, 64, defs[0], 1, isTrain,
                                   args.sn)  # 32x32
        convmaps = preresnet_group('group1', convmaps, 128, defs[1], 2,
                                   isTrain, args.sn)  # 16x16
        convmaps = preresnet_group('group2', convmaps, 256, defs[2], 2,
                                   isTrain, args.sn)  # 8x8
        convmaps_target = preresnet_group('group3new', convmaps, 512, defs[3],
                                          1, isTrain, args.sn)
        convmaps_gap = tf.reduce_mean(convmaps_target, [1, 2], name='gap')
        logits, w = Spec_FullyConnected('linearnew',
                                        convmaps_gap,
                                        200,
                                        sn=args.sn)

        weights = tf.identity(w, name='linearweight')
        activation_map = tf.identity(convmaps_target, name='actmap')
        y_c = tf.reduce_sum(tf.multiply(logits, label_onehot), axis=1)
        target_conv_layer_grad = tf.identity(tf.gradients(
            y_c, convmaps_target)[0],
                                             name='grad')

        loss = compute_loss_and_error(logits, label)
        wd_cost = regularize_cost('.*/W',
                                  l2_regularizer(1e-4),
                                  name='l2_regularize_loss')
        add_moving_summary(loss, wd_cost)
        return tf.add_n([loss, wd_cost], name='cost')
Ejemplo n.º 22
0
    def dropout_embedding_w(self, w, keep_prob):
        """
        Dropout for embedding matrix w.
        The idea is to ignore certain words completely at random
        """
        is_training = get_current_tower_context().is_training
        do_dropout = keep_prob is not None and keep_prob < 1.0 and is_training
        if not do_dropout:
            return w

        # [n_vocab, nhid]
        w_shape = w.get_shape().as_list()
        mask = tf.random_uniform(shape=[w_shape[0], 1],
                                 minval=0,
                                 maxval=1,
                                 dtype=tf.float32)
        mask = tf.floor(mask + keep_prob) / keep_prob
        return tf.multiply(mask, w)
Ejemplo n.º 23
0
    def build_graph(self, state, action, futurereward, action_prob):
        logits, value = self._get_NN_prediction(state)
        value = tf.squeeze(value, [1], name='pred_value')  # (B,)
        policy = tf.nn.softmax(logits, name='policy')
        is_training = get_current_tower_context().is_training
        if not is_training:
            return
        log_probs = tf.log(policy + 1e-6)

        log_pi_a_given_s = tf.reduce_sum(
            log_probs * tf.one_hot(action, self.num_actions), 1)
        advantage = tf.subtract(tf.stop_gradient(value),
                                futurereward,
                                name='advantage')

        pi_a_given_s = tf.reduce_sum(
            policy * tf.one_hot(action, self.num_actions), 1)  # (B,)
        importance = tf.stop_gradient(
            tf.clip_by_value(pi_a_given_s / (action_prob + 1e-8), 0, 10))

        policy_loss = tf.reduce_sum(log_pi_a_given_s * advantage * importance,
                                    name='policy_loss')
        xentropy_loss = tf.reduce_sum(policy * log_probs, name='xentropy_loss')
        value_loss = tf.nn.l2_loss(value - futurereward, name='value_loss')

        pred_reward = tf.reduce_mean(value, name='predict_reward')
        advantage = tf.sqrt(tf.reduce_mean(tf.square(advantage)),
                            name='rms_advantage')
        entropy_beta = tf.get_variable(
            'entropy_beta',
            shape=[],
            initializer=tf.constant_initializer(0.01),
            trainable=False)
        cost = tf.add_n(
            [policy_loss, xentropy_loss * entropy_beta, value_loss])
        cost = tf.truediv(cost,
                          tf.cast(tf.shape(futurereward)[0], tf.float32),
                          name='cost')
        summary.add_moving_summary(
            policy_loss, xentropy_loss, value_loss, pred_reward, advantage,
            cost, tf.reduce_mean(importance, name='importance'))
        return cost
Ejemplo n.º 24
0
def get_bn_variables(n_out, use_scale, use_bias, beta_init, gamma_init):
    if use_bias:
        beta = tf.get_variable('beta', [n_out], initializer=beta_init)
    else:
        beta = tf.zeros([n_out], name='beta')
    if use_scale:
        gamma = tf.get_variable('gamma', [n_out], initializer=gamma_init)
    else:
        gamma = tf.ones([n_out], name='gamma')
    # x * gamma + beta

    moving_mean = tf.get_variable('mean/EMA', [n_out],
                                  initializer=tf.constant_initializer(),
                                  trainable=False)
    moving_var = tf.get_variable('variance/EMA', [n_out],
                                 initializer=tf.constant_initializer(1.0),
                                 trainable=False)

    if get_current_tower_context().is_main_training_tower:
        for v in [moving_mean, moving_var]:
            tf.add_to_collection(tf.GraphKeys.MODEL_VARIABLES, v)
    return beta, gamma, moving_mean, moving_var
Ejemplo n.º 25
0
 def _basic_cell(self,
                 initializer=None,
                 hid_to_fs_params=None,
                 l_hallu_costs=None):
     is_training = get_current_tower_context().is_training
     if is_training:
         h_mask = self.cell_mask(self.keep_prob_h)
         x_mask = self.cell_mask(self.keep_prob_x)
     else:
         h_mask = x_mask = None
     cell = PetridishRNNCell(num_units=self.num_units,
                             layer_info_list=self.layer_info_list,
                             num_proj=self.num_proj,
                             hid_to_fs_params=hid_to_fs_params,
                             l_hallu_costs=l_hallu_costs,
                             initializer=initializer,
                             data_format=self.data_format,
                             compute_hallu_stats=self.compute_hallu_stats,
                             h_mask=h_mask,
                             x_mask=x_mask)
     self.cells.append(cell)
     return cell
Ejemplo n.º 26
0
    def build_graph(self, image, label):
        """
        The default tower function.
        """
        image = self.image_preprocess(image)
        assert self.data_format == 'NCHW'
        image = tf.transpose(image, [0, 3, 1, 2])
        ctx = get_current_tower_context()

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            # BatchNorm always comes with trouble. We use the testing mode of it during attack.
            with freeze_collection([tf.GraphKeys.UPDATE_OPS
                                    ]), argscope(BatchNorm, training=False):
                image, target_label = self.attacker.attack(
                    image, label, self.get_logits)
                image = tf.stop_gradient(image, name='adv_training_sample')

            logits = self.get_logits(image)

        loss = ImageNetModel.compute_loss_and_error(
            logits, label, label_smoothing=self.label_smoothing)
        AdvImageNetModel.compute_attack_success(logits, target_label)
        if not ctx.is_training:
            return

        wd_loss = regularize_cost(self.weight_decay_pattern,
                                  tf.contrib.layers.l2_regularizer(
                                      self.weight_decay),
                                  name='l2_regularize_loss')
        add_moving_summary(loss, wd_loss)
        total_cost = tf.add_n([loss, wd_loss], name='cost')

        if self.loss_scale != 1.:
            logger.info("Scaling the total loss by {} ...".format(
                self.loss_scale))
            return total_cost * self.loss_scale
        else:
            return total_cost
Ejemplo n.º 27
0
def regularize_cost_from_collection(name='regularize_cost'):
    """
    Get the cost from the regularizers in ``tf.GraphKeys.REGULARIZATION_LOSSES``.
    If in replicated mode, will only regularize variables created within the current tower.
    Args:
        name (str): the name of the returned tensor
    Returns:
        tf.Tensor: a scalar, the total regularization cost.
    """
    ctx = get_current_tower_context()
    if not ctx.is_training:
        # TODO Currently cannot build the wd_cost correctly at inference,
        # because ths vs_name used in inference can be '', therefore the
        # variable filter will fail
        return tf.constant(0, dtype=tf.float32, name='empty_' + name)

    # NOTE: this collection doesn't always grow with towers.
    # It only grows with actual variable creation, but not get_variable call.
    if ctx.has_own_variables:  # be careful of the first tower (name='')
        losses = ctx.get_collection_in_tower(
            tfv1.GraphKeys.REGULARIZATION_LOSSES)
    else:
        losses = tfv1.get_collection(tfv1.GraphKeys.REGULARIZATION_LOSSES)
    if len(losses) > 0:
        logger.info("regularize_cost_from_collection() found {} regularizers "
                    "in REGULARIZATION_LOSSES collection.".format(len(losses)))

        def maploss(l):
            assert l.dtype.is_floating, l
            if l.dtype != tf.float32:
                l = tf.cast(l, tf.float32)
            return l

        losses = [maploss(l) for l in losses]
        reg_loss = tf.add_n(losses, name=name)
        return reg_loss
    else:
        return tf.constant(0, dtype=tf.float32, name='empty_' + name)
Ejemplo n.º 28
0
    def _get_NN_prediction(self, state):
        from tensorpack.tfutils import symbolic_functions
        ctx = get_current_tower_context()
        is_training = ctx.is_training
        l = state
        # l = tf.Print(l, [state], 'State = ')
        with tf.variable_scope('critic') as vs:

            from autodrive.model.selu import fc_selu
            for lidx in range(8):
                l = fc_selu(l, 200,
                            keep_prob=1., # 由于我们只使用传感器训练,关键信息不能丢
                            is_training=is_training, name='fc-{}'.format(lidx))
            # l = tf.layers.dense(l, 512, activation=tf.nn.relu, name='fc-dense')
            # for lidx, hidden_size in enumerate([300, 600]):
            #     l = tf.layers.dense(l, hidden_size, activation=tf.nn.relu, name='fc-%d'%lidx)
            value = tf.layers.dense(l, 1, name='fc-value',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
            if not hasattr(self, '_weights_critic'):
                self._weights_critic = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        with tf.variable_scope('actor') as vs:
            l = tf.stop_gradient(l)
            mu_steering = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mu_accel = tf.layers.dense(l, 1, activation=tf.nn.tanh, name='fc-mu-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            mus = tf.concat([mu_steering, mu_accel], axis=-1)
            # mus = tf.layers.dense(l, 2, activation=tf.nn.tanh, name='fc-mus')
            # sigmas = tf.layers.dense(l, 2, activation=tf.nn.softplus, name='fc-sigmas')
            # sigmas = tf.clip_by_value(sigmas, -0.001, 0.5)
            sigma_steering_ = 0.5 * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-steering',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            sigma_accel_ = 1. * tf.layers.dense(l, 1, activation=tf.nn.sigmoid, name='fc-sigma-accel',\
            kernel_initializer=tf.truncated_normal_initializer(stddev=0.01))
            # sigma_beta_steering = symbolic_functions.get_scalar_var('sigma_beta_steering', 0.3, summary=True, trainable=False)
            # sigma_beta_accel = symbolic_functions.get_scalar_var('sigma_beta_accel', 0.3, summary=True, trainable=False)
            from tensorpack.tfutils.common import get_global_step_var
            sigma_beta_steering_exp = tf.train.exponential_decay(0.001, get_global_step_var(), 1000, 0.5, name='sigma/beta/steering/exp')
            sigma_beta_accel_exp = tf.train.exponential_decay(0.5, get_global_step_var(), 5000, 0.5, name='sigma/beta/accel/exp')
            # sigma_steering = tf.minimum(sigma_steering_ + sigma_beta_steering, 0.5)
            # sigma_accel = tf.minimum(sigma_accel_ + sigma_beta_accel, 0.2)
            # sigma_steering = sigma_steering_
            sigma_steering = (sigma_steering_ + sigma_beta_steering_exp)
            sigma_accel = (sigma_accel_ + sigma_beta_accel_exp) #* 0.1
            # sigma_steering = sigma_steering_
            # sigma_accel = sigma_accel_
            sigmas = tf.concat([sigma_steering, sigma_accel], axis=-1)
            #     sigma_steering = tf.clip_by_value(sigma_steering, 0.1, 0.5)

            #     sigma_accel = tf.clip_by_value(sigma_accel, 0.1, 0.5)

            # sigmas = sigmas_orig + 0.001
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigma_beta = tf.get_variable('sigma_beta', shape=[], dtype=tf.float32,
            #                              initializer=tf.constant_initializer(.5), trainable=False)

            # if is_training:
            #     pass
            #     # 如果不加sigma_beta,收敛会很慢,并且不稳定,猜测可能是以下原因:
            #     #   1、训练前期尽量大的探索可以避免网络陷入局部最优
            #     #   2、前期过小的sigma会使normal_dist的log_prob过大,导致梯度更新过大,网络一开始就畸形了,很难恢复回来
            #
            # if is_training:
            #     sigmas += sigma_beta_steering
            # sigma_steering = tf.clip_by_value(sigma_steering, sigma_beta_steering, 0.5)
            # sigma_accel = tf.clip_by_value(sigma_accel, sigma_beta_accel, 0.5)
            # sigmas = tf.clip_by_value(sigmas, 0.1, 0.5)
            # sigmas_orig = sigmas
            # sigmas = sigmas + sigma_beta_steering
            # sigmas = tf.minimum(sigmas + 0.1, 100)
            # sigmas = tf.clip_by_value(sigmas, sigma_beta_steering, 1)
            # sigma_steering += sigma_beta_steering
            # sigma_accel += sigma_beta_accel

            # mus = tf.concat([mu_steering, mu_accel], axis=-1)

            from tensorflow.contrib.distributions import Normal
            dists = Normal(mus, sigmas+1e-3)
            actions = tf.squeeze(dists.sample([1]), [0])
            # 裁剪到一倍方差之内
            # actions = tf.clip_by_value(actions, -1., 1.)
            if is_training:
                summary.add_moving_summary(tf.reduce_mean(mu_steering, name='mu/steering/mean'),
                                           tf.reduce_mean(mu_accel, name='mu/accel/mean'),
                                           tf.reduce_mean(sigma_steering, name='sigma/steering/mean'),
                                           tf.reduce_max(sigma_steering, name='sigma/steering/max'),
                                           tf.reduce_mean(sigma_accel, name='sigma/accel/mean'),
                                           tf.reduce_max(sigma_accel, name='sigma/accel/max'),
                                           sigma_beta_accel_exp,
                                           sigma_beta_steering_exp,
                                           )
            # actions = tf.Print(actions, [mus, sigmas, tf.concat([sigma_steering_, sigma_accel_], -1), actions],
            #                    'mu/sigma/sigma.orig/act=', summarize=4)
            if not hasattr(self, '_weights_actor'):
                self._weights_actor = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)

        return actions, value, dists
Ejemplo n.º 29
0
 def training(self):
     return get_current_tower_context().is_training
Ejemplo n.º 30
0
    def _build_graph(self, inputs):
        from tensorpack.tfutils.common import get_global_step_var
        state, action, futurereward, advantage = inputs
        is_training = get_current_tower_context().is_training
        policy, value, dists = self._get_NN_prediction(state)
        if not hasattr(self, '_weights_train'):
            self._weights_train = self._weights_critic + self._weights_actor
        self.value = tf.squeeze(value, [1], name='value')  # (B,)
        self.policy = tf.identity(policy, name='policy')

        with tf.variable_scope("Pred") as vs:
            __p, __v, _ = self._get_NN_prediction(state)
            __v = tf.squeeze(__v, [1], name='value')  # (B,)
            __p = tf.identity(__p, name='policy')
            if not hasattr(self, '_weights_pred'):
                self._weights_pred = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name)
                assert (len(self._weights_train) == len(self._weights_pred))
                assert (not hasattr(self, '_sync_op'))
                self._sync_op = tf.group(*[d.assign(s + tf.truncated_normal(tf.shape(s), stddev=0.02)) for d, s in zip(self._weights_pred, self._weights_train)])

        with tf.variable_scope('pre') as vs:
            pre_p,pre_v,pre_dists=self._get_NN_prediction(state)
            if not hasattr(self,'pre_weights'):
                self.pre_weights=tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=vs.name)
                self._td_sync_op = tf.group(*[d.assign(s) for d, s in zip(self.pre_weights, self._weights_train)])


        if not is_training:
            return

        # advantage = tf.subtract(tf.stop_gradient(self.value), futurereward, name='advantage')
        # advantage = tf.Print(advantage, [self.value, futurereward, action, advantage], 'value/reward/act/advantage=', summarize=4)
        log_probs = dists.log_prob(action)
        #add  ppo policy clip loss
        #add ratio  ,surr1, surr2
        pre_probs=pre_dists.log_prob(action)
        ratio=tf.exp(log_probs-pre_probs)
        prob_ratio = tf.reduce_mean(input_tensor=tf.concat(values=ratio, axis=1), axis=1)
        clip_param=tf.train.exponential_decay(CLIP_PARAMETER, get_global_step_var(), 10000, 0.98, name='clip_param')


        # surr1=prob_ratio*advantage
        surr1=ratio*tf.expand_dims(advantage, -1)
        surr2=tf.clip_by_value(ratio,1.0-clip_param,1.0+clip_param)*tf.expand_dims(advantage, -1)
        
        # surr2=tf.clip_by_value(prob_ratio,1.0-clip_param,1.0+clip_param)*advantage

        loss_policy=-tf.reduce_mean(tf.minimum(surr1,surr2))

        #add critic clip loss
        v_loss1=tf.square(value-futurereward)
        pre_value=pre_v+tf.clip_by_value(value-pre_v,-clip_param,clip_param)
        v_loss2=tf.square(pre_v-futurereward)
        # loss_value=0.5*tf.reduce_mean(tf.maximum(v_loss1,v_loss2))
        loss_value=0.5*tf.reduce_mean(v_loss1)
        

        entropy = dists.entropy()
        entropy_beta = tf.get_variable('entropy_beta', shape=[],
                                       initializer=tf.constant_initializer(0.01), trainable=False)
        exp_v = entropy_beta * entropy
        loss_entropy = tf.reduce_mean(-tf.reduce_sum(exp_v, axis=-1), name='loss/policy')
        loss_policy=loss_policy+loss_entropy
        

        # exp_v = tf.transpose(
        #     tf.multiply(tf.transpose(log_probs), advantage))
        # exp_v = tf.multiply(log_probs, advantage)
        # exp_v = log_probs * tf.expand_dims(advantage, -1)
        # entropy = dists.entropy()
        # entropy_beta = tf.get_variable('entropy_beta', shape=[],
        #                                initializer=tf.constant_initializer(0.01), trainable=False)
        # exp_v = entropy_beta * entropy + exp_v
        
        # loss_value = tf.reduce_mean(0.5 * tf.square(self.value - futurereward))

        # loss_entropy = tf.reduce_mean(tf.reduce_sum(entropy, axis=-1), name='xentropy_loss')


        from tensorflow.contrib.layers.python.layers.regularizers import apply_regularization, l2_regularizer
        loss_l2_regularizer = apply_regularization(l2_regularizer(1e-4), self._weights_critic)
        loss_l2_regularizer = tf.identity(loss_l2_regularizer, 'loss/l2reg')
        loss_value += loss_l2_regularizer
        loss_value = tf.identity(loss_value, name='loss/value')

        # self.cost = tf.add_n([loss_policy, loss_value * 0.1, loss_l2_regularizer])
        self._cost = [loss_policy,
                      loss_value
                      ]
        from autodrive.trainer.summary import addParamSummary
        addParamSummary([('.*', ['rms', 'absmax'])])
        pred_reward = tf.reduce_mean(self.value, name='predict_reward')
        advantage = symbf.rms(advantage, name='rms_advantage')
        summary.add_moving_summary(loss_policy, loss_value,
                                   loss_entropy,
                                   pred_reward, advantage,
                                   loss_l2_regularizer,
                                   tf.reduce_mean(self.policy[:, 0], name='action/steering/mean'),
                                   tf.reduce_mean(self.policy[:, 1], name='action/accel/mean'),
                                    )
    def _build_graph(self, input_vars):
        image, label = input_vars
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        ctx = get_current_tower_context()
        print("train or test tower context?", ctx.is_training)

        tf.summary.image("Input Image", image[0:20],
                         max_outputs=20)  #.astype("uint8"))

        def conv(name, l, channel, stride, kernel_size):
            #rand_seed = np.random.randint(2**32-1)
            #np.random.seed(None)
            conv2d_he = Conv2D(
                name,
                l,
                channel,
                kernel_size,
                stride=stride,
                nl=tf.identity,
                use_bias=False,
                W_init=tf.variance_scaling_initializer(dtype=tf.float32))
            #tf.random_normal_initializer(stddev=np.sqrt(2.0/9/channel)))#tf.contrib.layers.variance_scaling_initializer(factor=2.0, mode='FAN_AVG', uniform=False))
            #np.random.seed(rand_seed)
            return conv2d_he

        def batch_norm(scope, name, layer, decay, layer_num, norm_pattern,
                       training):
            with tf.variable_scope(scope) as s:
                if training:
                    layer = BatchNorm(
                        name,
                        layer) if layer_num % norm_pattern != 0 else layer
                else:
                    if decay is not None:
                        layer = BatchNorm(
                            name, layer, decay=decay, use_local_stat=False
                        ) if layer_num % norm_pattern != 0 else layer
                    else:
                        layer = BatchNorm(
                            name, layer, use_local_stat=False
                        ) if layer_num % norm_pattern != 0 else layer
            return layer

        def add_layer(name, l, kernel_size, growth_rate, drop_rate, training,
                      layer_num, drop_pattern, bn_momentum, skip_norm):
            shape = l.get_shape().as_list()
            in_channel = shape[3]
            with tf.variable_scope(name) as scope:
                # layer num mod 1 for bnorm every layer
                c = batch_norm(name, 'bn.{}'.format(layer_num), l, bn_momentum,
                               layer_num, skip_norm, training)  #epsilon=0.001
                c = tf.nn.relu(c)
                c = conv('conv1', c, growth_rate, 1, kernel_size)
                l = tf.concat([c, l], 3)
                if drop_pattern != 0 and layer_num % drop_pattern == 0:
                    spatial_drop = tf.shape(l)
                    # drop every layer mod drop_pattern, drop_pattern == 0 if no drop wanted
                    l = tf.cond(
                        tf.equal(tf.constant(training),
                                 tf.constant(True)), lambda: tf.nn.
                        dropout(l,
                                keep_prob=tf.constant(drop_rate),
                                noise_shape=
                                [spatial_drop[0], 1, 1, spatial_drop[3]],
                                name='dropblock'), lambda: l)
            return l

        def add_transition(name, l, drop_rate, training, drop_pattern,
                           transition_number, bn_momentum):
            shape = l.get_shape().as_list()
            in_channel = shape[3]
            with tf.variable_scope(name) as scope:
                l = batch_norm(name, 'bntransit.{}'.format(transition_number),
                               l, bn_momentum, 42, 43, training)
                l = tf.nn.relu(l)
                l = Conv2D('conv1',
                           l,
                           in_channel,
                           1,
                           stride=1,
                           use_bias=False,
                           nl=tf.nn.relu)
                if drop_pattern != 0:
                    l = tf.cond(
                        tf.equal(tf.constant(training), tf.constant(True)),
                        lambda: tf.nn.dropout(l,
                                              keep_prob=tf.constant(drop_rate),
                                              name='droptransition'),
                        lambda: l)
                l = AvgPooling('pool', l, 2)
            return l

        def dense_net(name):

            l = conv('conv0', image, self.filters_init, 1, self.kernel_size)

            with tf.variable_scope('block1') as scope:
                for i in range(self.N):
                    #(name, l, kernel_size, growth_rate, drop_rate, training, layer_num, drop_pattern):
                    l = add_layer(name='dense_layer.{}'.format(i),
                                  l=l,
                                  kernel_size=self.kernel_size,
                                  growth_rate=self.growthRate,
                                  drop_rate=self.drop_rate,
                                  training=self.train_or_test,
                                  layer_num=i,
                                  drop_pattern=0,
                                  bn_momentum=self.bn_momentum,
                                  skip_norm=self.skip_norm)

                l = add_transition(name='transition1',
                                   l=l,
                                   drop_rate=self.drop_rate,
                                   training=self.train_or_test,
                                   drop_pattern=self.drop_pattern,
                                   transition_number=1,
                                   bn_momentum=self.bn_momentum)

            with tf.variable_scope('block2') as scope:
                for i in range(self.N):
                    l = add_layer('dense_layer.{}'.format(i), l,
                                  self.kernel_size, self.growthRate,
                                  self.drop_rate, self.train_or_test, i,
                                  self.drop_pattern, self.bn_momentum,
                                  self.skip_norm)
                l = add_transition('transition2', l, self.drop_rate,
                                   self.train_or_test, self.drop_pattern, 2,
                                   self.bn_momentum)

            with tf.variable_scope('block3') as scope:
                for i in range(self.N):
                    l = add_layer('dense_layer.{}'.format(i), l,
                                  self.kernel_size, self.growthRate,
                                  self.drop_rate, self.train_or_test, i,
                                  self.drop_pattern, self.bn_momentum,
                                  self.skip_norm)

            l = batch_norm(name, 'bnlast', l, self.bn_momentum, 42, 42 + 1,
                           self.train_or_test)
            l = tf.nn.relu(l)
            l = GlobalAvgPooling('gap', l)
            logits = FullyConnected('linear', l, out_dim=2, nl=tf.identity)

            return logits

        def prediction_incorrect(logits,
                                 label,
                                 topk=1,
                                 name='incorrect_vector'):
            #with tf.name_scope('prediction_incorrect'):
            x = tf.logical_not(tf.nn.in_top_k(logits, label, topk))
            return tf.cast(x, tf.float32, name=name)

        logits = dense_net("dense_net")  #map probabilities to real domain

        prob = tf.nn.softmax(
            logits, name='output'
        )  #a generalization of the logistic function that "squashes" a K-dim vector z  of arbitrary real values to a K-dim vector sigma( z ) of real values in the range [0, 1] that add up to 1.

        factorbl = (self.class_0 + self.class_1) / (
            2 * self.class_0
        )  #tf.divide(tf.add(self.class_0, self.class_1), tf.multiply(tf.constant(2.0,dtype=tf.float32), self.class_0))
        factordl = (self.class_0 + self.class_1) / (
            2 * self.class_1
        )  #tf.divide(tf.add(self.class_0, self.class_1), tf.multiply(tf.constant(2.0,dtype=tf.float32), self.class_1))
        class_weights = tf.constant([factorbl, factordl])
        weights = tf.gather(class_weights, label)

        cost = tf.losses.sparse_softmax_cross_entropy(
            label, logits, weights=weights
        )  #False positive 3* False negatives so adjust weight by factor
        cost = tf.reduce_mean(cost, name='cross_entropy_loss')  #normalize

        wrong = prediction_incorrect(logits, label)

        # monitor training error
        add_moving_summary(tf.reduce_mean(wrong, name='train_error'))

        # weight decay on all W
        wd_reg = tf.constant(self.weight_decay_rate, dtype=tf.float32)
        wd_cost = tf.multiply(wd_reg,
                              regularize_cost('.*/W', tf.nn.l2_loss),
                              name='wd_cost')
        add_moving_summary(cost, wd_cost)

        add_param_summary(('.*/W', ['histogram']))  # monitor W

        self.cost = tf.add_n([cost, wd_cost], name='cost')
        return self.cost
Ejemplo n.º 32
0
    def build_graph(self, x, image_target):
        with tf.name_scope("preprocess"):
            image_target = image_target / 255.

        def viz(name, images):
            with tf.name_scope(name):
                im = tf.concat(images, axis=2)
                #im = tf.transpose(im, [0, 2, 3, 1])
                if self._act_input == tf.tanh:
                    im = (im + 1.0) * 127.5
                else:
                    im = im * 255
                im = tf.clip_by_value(im, 0, 255)
                im = tf.round(im)
                im = tf.cast(im, tf.uint8, name="viz")
            return im

        # calculate gram_target
        _, gram_target = self._build_extractor(image_target, name="ext_target")
        # inference pre_image_output from pre_image_input and gram_target
        self.image_outputs = list()
        self.loss_per_stage = list()
        x_output = x
        with tf.variable_scope("syn"):
            # use data stats in both train and test phases
            with argscope(BatchNorm, training=True):
                for s in range(self._n_stage):
                    # get the first (s+1) coefs
                    coefs = OrderedDict()
                    for k in list(SynTexModelDesc.DEFAULT_COEFS.keys())[:s +
                                                                        1]:
                        coefs[k] = SynTexModelDesc.DEFAULT_COEFS[k]
                    x_image, loss_input, _, x_output = \
                        self.build_stage(x_output, gram_target, coefs, name="stage%d" % s)
                    self.image_outputs.append(x_image)
                    self.loss_per_stage.append(
                        tf.reduce_mean(loss_input, name="loss%d" % s))
        self.collect_variables("syn")
        #
        image_output = self._act_input(x_output, name="output")
        loss_output, loss_per_layer_output, _ = \
            self._build_loss(image_output, gram_target, calc_grad=False)
        self.image_outputs.append(image_output)
        self.loss_per_stage.append(
            tf.reduce_mean(loss_output, name="loss_output"))
        self.loss_per_layer_output = OrderedDict()
        with tf.name_scope("loss_per_layer_output"):
            for layer in loss_per_layer_output:
                self.loss_per_layer_output[layer] = tf.reduce_mean(
                    loss_per_layer_output[layer], name=layer)
        # average losses from all stages
        weights = [1.]
        for _ in range(len(self.loss_per_stage) - 1):
            weights.append(weights[-1] * self._loss_scale)
        # skip the first loss as it is computed from noise
        self.loss = tf.add_n([weights[i] * loss \
            for i, loss in enumerate(reversed(self.loss_per_stage[1:]))], name="loss")
        # summary
        #with tf.device("/cpu:0"):
        stages_target = viz("stages-target",
                            self.image_outputs + [image_target])
        ctx = get_current_tower_context()
        if ctx is not None and ctx.is_main_training_tower:
            tf.summary.image("stages-target",
                             stages_target,
                             max_outputs=10,
                             collections=["image_summaries"])
            add_moving_summary(self.loss, *self.loss_per_stage,
                               *self.loss_per_layer_output.values())
Ejemplo n.º 33
0
    def build_graph(self, seq, tseq):
        batch_size = self.bs_per_gpu
        dynamic_seq_len = tf.shape(seq)[1]
        labels = tf.reshape(tseq, [-1])
        DROPOUT = 0.5
        with argscope(
                    [
                        Conv2D, Deconv2D, GroupedConv2D, AvgPooling,
                        MaxPooling, BatchNorm, GlobalAvgPooling,
                        ResizeImages, SeparableConv2D
                    ],
                    data_format=self.data_format
                ), \
                argscope(
                    [Conv2D, Deconv2D, GroupedConv2D, SeparableConv2D],
                    activation=tf.identity,
                    use_bias=self.options.use_bias
                ), \
                argscope(
                    [BatchNorm],
                    center=False,
                    scale=False,
                    decay=self.options.batch_norm_decay,
                    epsilon=self.options.batch_norm_epsilon
                ), \
                argscope(
                    [candidate_gated_layer],
                    eps=self.options.candidate_gate_eps
                ):

            is_training = get_current_tower_context().is_training
            initializer = tf.random_uniform_initializer(
                -self.init_range, self.init_range)
            # B x seqlen x hidden
            seq, embedding_w = self._embed_input_if_int(
                seq, initializer=initializer)
            seq = self.locked_dropout(seq, self.keep_prob_i)
            hid_to_fs_params = _init_feature_select(
                self.layer_info_list, 'master', self.options.feat_sel_lambda)
            l_hallu_costs = []

            self.basic_cells = basic_cells = [
                self._basic_cell(initializer=initializer,
                                 hid_to_fs_params=hid_to_fs_params,
                                 l_hallu_costs=l_hallu_costs)
                for _ in range(self.num_lstms)
            ]
            cells = rnn.MultiRNNCell(basic_cells)

            self.state = tuple([
                basic_cells[k].get_state_var(
                    self.state_var_names[k], batch_size) \
                for k in range(self.num_lstms)
            ])
            self.last_state = tuple([
                basic_cells[k].get_state_var(
                    self.last_state_var_names[k] + '_last', batch_size) \
                for k in range(self.num_lstms)
            ])
            self._update_init_state_op = self.update_init_state()
            with tf.control_dependencies([self._update_init_state_op]):
                with tf.variable_scope('RNN', initializer=initializer):
                    outputs, last_state = tf.nn.dynamic_rnn(
                        cells,
                        seq,
                        initial_state=self.state,
                        parallel_iterations=self.max_len)
                    # for the update op
            self._update_last_state_op = self.update_last_state(
                tf.stop_gradient(last_state))
            with tf.control_dependencies([self._update_last_state_op]):
                seqout, sum_hallu_costs = basic_cells[-1].split_outputs(
                    outputs)
                seqout = self.locked_dropout(seqout, self.keep_prob)
                flat_seqout = tf.reshape(seqout, [-1, self.num_units])

            # compute logits and prediction log loss
            if self.lock_embedding:
                logits = self.linear_with_embedding_w(flat_seqout, embedding_w)
            else:
                logits = FullyConnected('linear',
                                        flat_seqout,
                                        self.vocab_size,
                                        activation=tf.identity)
            logloss = tf.nn.sparse_softmax_cross_entropy_with_logits(
                logits=logits, labels=labels)
            per_seq_logloss = tf.reduce_sum(tf.reshape(logloss,
                                                       [self.bs_per_gpu, -1]),
                                            axis=1,
                                            name="per_seq_sum_logloss")
            cost = tf.truediv(tf.reduce_sum(logloss),
                              tf.cast(self.bs_per_gpu, tf.float32),
                              name='avg_batch_cost')
            float_seq_len = tf.cast(dynamic_seq_len,
                                    tf.float32,
                                    name='seq_len')

            # # tensorpack bullshits. Inferencer must use tensors
            # # so we have to create a tensor ....
            # test_time_udpate = self.update_state(
            #     [per_seq_logloss], name='test_time_update')
            # with tf.control_dependencies([test_time_udpate]):
            #     self._inference_update_tensor = tf.multiply(
            #         cost, 1.0001, name=self._inference_update_tensor_name)

            perpl = tf.identity(tf.exp(cost / float_seq_len),
                                name='perplexity')
            add_moving_summary(perpl, cost, float_seq_len)

            # regularization
            if self.rnn_l2_reg:
                cost += (self.rnn_l2_reg * tf.reduce_sum(seqout**2) /
                         tf.to_float(self.bs_per_gpu))
            if self.rnn_slowness_reg:
                assert self.t_dim == 1
                all_h_diff = tf.reduce_sum(
                    (seqout[:, 1:, :] - seqout[:, :-1, :])**2)
                cost += (self.rnn_slowness_reg * all_h_diff /
                         tf.to_float(self.bs_per_gpu))
            wd_w = self.options.regularize_const
            if self.params_to_regularize is not None and wd_w:
                wd_cost = wd_w * regularize_cost(self.params_to_regularize,
                                                 tf.nn.l2_loss)
                wd_cost = tf.identity(wd_cost, name='wd_cost')
                add_moving_summary(wd_cost)
                cost += wd_cost
            cost = tf.identity(cost, name='rnn_reg_cost')
            add_moving_summary(cost)

            # hallucination costs
            if l_hallu_costs:
                sum_hallu_costs = tf.identity(sum_hallu_costs,
                                              name='hallu_cost')
                add_moving_summary(sum_hallu_costs)
                cost += sum_hallu_costs
            # this computes some gradient norms
            self._build_hallu_stats_graph(cost)
            # scale the loss according the to sequence length
            self.cost = tf.identity(cost * float_seq_len /
                                    np.float32(self.max_len),
                                    name='cost')
            add_moving_summary(self.cost)
            return self.cost
Ejemplo n.º 34
0
def QuantizedWeight(name, x, n, nbit=2):
    """
    Quantize weight.
    Args:
        x (tf.Tensor): a 4D tensor.
            Must have known number of channels, but can have other unknown dimensions.
        name (str): operator's name.
        n (int or double): variance of weight initialization.
        nbit (int): number of bits of quantized weight. Defaults to 2.
    Returns:
        tf.Tensor with attribute `variables`.
    Variable Names:
    * ``basis``: basis of quantized weight.
    Note:
        About multi-GPU training: moving averages across GPUs are not aggregated.
        Batch statistics are computed by main training tower. This is consistent with most frameworks.
    """
    num_filters = x.get_shape().as_list()[-1]

    init_basis = []
    base = NORM_PPF_0_75 * ((2. / n)**0.5) / (2**(nbit - 1))
    for j in range(nbit):
        init_basis.append([(2**j) * base for i in range(num_filters)])
    init_basis = tf.constant_initializer(init_basis)

    bit_dims = [nbit, num_filters]
    num_levels = 2**nbit
    delta = EPS

    # initialize level multiplier
    init_level_multiplier = []
    for i in range(num_levels):
        level_multiplier_i = [0. for j in range(nbit)]
        level_number = i
        for j in range(nbit):
            binary_code = level_number % 2
            if binary_code == 0:
                binary_code = -1
            level_multiplier_i[j] = float(binary_code)
            level_number = level_number // 2
        init_level_multiplier.append(level_multiplier_i)

    # initialize threshold multiplier
    init_thrs_multiplier = []
    for i in range(1, num_levels):
        thrs_multiplier_i = [0. for j in range(num_levels)]
        thrs_multiplier_i[i - 1] = 0.5
        thrs_multiplier_i[i] = 0.5
        init_thrs_multiplier.append(thrs_multiplier_i)

    with tf.variable_scope(name):
        basis = tf.get_variable('basis',
                                bit_dims,
                                tf.float32,
                                initializer=init_basis,
                                trainable=False)
        level_codes = tf.constant(init_level_multiplier)
        thrs_multiplier = tf.constant(
            init_thrs_multiplier
        )  # ValueError: Cannot create a tensor proto whose content is larger than 2GB.
        sum_multiplier = tf.constant(
            1., shape=[1, tf.reshape(x, [-1, num_filters]).get_shape()[0]])
        sum_multiplier_basis = tf.constant(1., shape=[1, nbit])

        # calculate levels and sort
        levels = tf.matmul(level_codes, basis)
        levels, sort_id = tf.nn.top_k(tf.transpose(levels, [1, 0]), num_levels)
        levels = tf.reverse(levels, [-1])
        sort_id = tf.reverse(sort_id, [-1])
        levels = tf.transpose(levels, [1, 0])
        sort_id = tf.transpose(sort_id, [1, 0])

        # calculate threshold
        thrs = tf.matmul(thrs_multiplier, levels)

        # calculate level codes per channel
        reshape_x = tf.reshape(x, [-1, num_filters])
        level_codes_channelwise_dims = tf.stack(
            [num_levels * num_filters, nbit])
        level_codes_channelwise = tf.fill(level_codes_channelwise_dims, 0.)
        for i in range(num_levels):
            eq = tf.equal(sort_id, i)
            level_codes_channelwise = tf.where(
                tf.reshape(eq, [-1]), level_codes_channelwise + level_codes[i],
                level_codes_channelwise)
        level_codes_channelwise = tf.reshape(level_codes_channelwise,
                                             [num_levels, num_filters, nbit])

        # calculate output y and its binary code
        y = tf.zeros_like(x) + levels[0]  # output
        zero_dims = tf.stack([tf.shape(reshape_x)[0] * num_filters, nbit])
        bits_y = tf.fill(zero_dims, -1.)
        zero_y = tf.zeros_like(x)
        zero_bits_y = tf.fill(zero_dims, 0.)
        zero_bits_y = tf.reshape(zero_bits_y, [-1, num_filters, nbit])
        for i in range(num_levels - 1):
            g = tf.greater(x, thrs[i])
            y = tf.where(g, zero_y + levels[i + 1], y)
            bits_y = tf.where(
                tf.reshape(g, [-1]),
                tf.reshape(zero_bits_y + level_codes_channelwise[i + 1],
                           [-1, nbit]), bits_y)
        bits_y = tf.reshape(bits_y, [-1, num_filters, nbit])

        ctx = get_current_tower_context()  # current tower context
        # training
        if ctx.is_main_training_tower:
            BT = tf.transpose(bits_y, [2, 0, 1])
            # calculate BTxB
            BTxB = []
            for i in range(nbit):
                for j in range(nbit):
                    BTxBij = tf.multiply(BT[i], BT[j])
                    BTxBij = tf.matmul(sum_multiplier, BTxBij)
                    if i == j:
                        mat_one = tf.ones([1, num_filters])
                        BTxBij = BTxBij + (delta * mat_one)  # + E
                    BTxB.append(BTxBij)
            BTxB = tf.reshape(tf.stack(values=BTxB), [nbit, nbit, num_filters])

            # calculate inverse of BTxB
            if nbit > 2:
                BTxB_transpose = tf.transpose(BTxB, [2, 0, 1])
                # 1) naive
                # BTxB_inv = tf.matrix_inverse(BTxB_transpose)
                # 2) try, except
                try:
                    BTxB_inv = tf.matrix_inverse(BTxB_transpose,
                                                 adjoint=None,
                                                 name=None)
                except:
                    BTxB_ttt = tf.add(
                        BTxB_transpose,
                        tf.math.scalar_mul(tf.identity((BTxB_transpose.shape)),
                                           1e-6))
                    BTxB_inv = tf.matrix_inverse(BTxB_ttt,
                                                 adjoint=None,
                                                 name=None)
                BTxB_inv = tf.transpose(BTxB_inv, [1, 2, 0])
            elif nbit == 2:
                det = tf.multiply(BTxB[0][0], BTxB[1][1]) - tf.multiply(
                    BTxB[0][1], BTxB[1][0])
                inv = []
                inv.append(BTxB[1][1] / det)
                inv.append(-BTxB[0][1] / det)
                inv.append(-BTxB[1][0] / det)
                inv.append(BTxB[0][0] / det)
                BTxB_inv = tf.reshape(tf.stack(values=inv),
                                      [nbit, nbit, num_filters])
            elif nbit == 1:
                BTxB_inv = tf.reciprocal(BTxB)

            # calculate BTxX
            BTxX = []
            for i in range(nbit):
                BTxXi0 = tf.multiply(BT[i], reshape_x)
                BTxXi0 = tf.matmul(sum_multiplier, BTxXi0)
                BTxX.append(BTxXi0)
            BTxX = tf.reshape(tf.stack(values=BTxX), [nbit, num_filters])
            BTxX = BTxX + (delta * basis)  # + basis

            # calculate new basis
            new_basis = []
            for i in range(nbit):
                new_basis_i = tf.multiply(BTxB_inv[i], BTxX)
                new_basis_i = tf.matmul(sum_multiplier_basis, new_basis_i)
                add_moving_summary(
                    tf.reduce_mean(new_basis_i, name='new_basis_bit' + str(i)))
                new_basis.append(new_basis_i)
            new_basis = tf.reshape(tf.stack(values=new_basis),
                                   [nbit, num_filters])

            # create moving averages op
            updata_moving_basis = moving_averages.assign_moving_average(
                basis, new_basis, MOVING_AVERAGES_FACTOR)
            add_model_variable(basis)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, updata_moving_basis)

            # add_moving_summary(tf.identity(basis, name='basis'), tf.identity(new_basis, name='basis_new'))
            # add_moving_summary(tf.identity(basis, name='basis'))

        y = x + tf.stop_gradient(-x) + tf.stop_gradient(y)  # gradient: y=x
        y.variables = VariableHolder(basis=basis)
        return y
Ejemplo n.º 35
0
def QuantizedActiv(x, nbit=2):
    """
    Quantize activation.
    Args:
        x (tf.Tensor): a 4D tensor.
        nbit (int): number of bits of quantized activation. Defaults to 2.
    Returns:
        tf.Tensor with attribute `variables`.
    Variable Names:
    * ``basis``: basis of quantized activation.
    Note:
        About multi-GPU training: moving averages across GPUs are not aggregated.
        Batch statistics are computed by main training tower. This is consistent with most frameworks.
    """
    init_basis = [(NORM_PPF_0_75 * 2 / (2**nbit - 1)) * (2.**i)
                  for i in range(nbit)]
    init_basis = tf.constant_initializer(init_basis)

    bit_dims = [nbit, 1]
    num_levels = 2**nbit
    # initialize level multiplier
    init_level_multiplier = []
    for i in range(0, num_levels):
        level_multiplier_i = [0. for j in range(nbit)]
        level_number = i
        for j in range(nbit):
            level_multiplier_i[j] = float(level_number % 2)
            level_number = level_number // 2
        init_level_multiplier.append(level_multiplier_i)
    # initialize threshold multiplier
    init_thrs_multiplier = []
    for i in range(1, num_levels):
        thrs_multiplier_i = [0. for j in range(num_levels)]
        thrs_multiplier_i[i - 1] = 0.5
        thrs_multiplier_i[i] = 0.5
        init_thrs_multiplier.append(thrs_multiplier_i)

    with tf.variable_scope('ActivationQuantization'):
        basis = tf.get_variable('basis',
                                bit_dims,
                                tf.float32,
                                initializer=init_basis,
                                trainable=False)

        ctx = get_current_tower_context()  # current tower context
        # calculate levels and sort
        level_codes = tf.constant(init_level_multiplier)
        levels = tf.matmul(level_codes, basis)
        levels, sort_id = tf.nn.top_k(tf.transpose(levels, [1, 0]), num_levels)
        levels = tf.reverse(levels, [-1])
        sort_id = tf.reverse(sort_id, [-1])
        levels = tf.transpose(levels, [1, 0])
        sort_id = tf.transpose(sort_id, [1, 0])
        # calculate threshold
        thrs_multiplier = tf.constant(init_thrs_multiplier)
        thrs = tf.matmul(thrs_multiplier, levels)
        # calculate output y and its binary code
        y = tf.zeros_like(x)  # output
        reshape_x = tf.reshape(x, [-1])
        zero_dims = tf.stack([tf.shape(reshape_x)[0], nbit])
        bits_y = tf.fill(zero_dims, 0.)
        zero_y = tf.zeros_like(x)
        zero_bits_y = tf.fill(zero_dims, 0.)
        for i in range(num_levels - 1):
            g = tf.greater(x, thrs[i])
            y = tf.where(g, zero_y + levels[i + 1], y)
            bits_y = tf.where(tf.reshape(g, [-1]),
                              zero_bits_y + level_codes[sort_id[i + 1][0]],
                              bits_y)
        # training
        if ctx.is_main_training_tower:
            BT = tf.matrix_transpose(bits_y)
            # calculate BTxB
            BTxB = []
            for i in range(nbit):
                for j in range(nbit):
                    BTxBij = tf.multiply(BT[i], BT[j])
                    BTxBij = tf.reduce_sum(BTxBij)
                    # all dimensions are reduced, and a tensor with a single element is returned.  i.e. 6
                    BTxB.append(BTxBij)
            BTxB = tf.reshape(tf.stack(values=BTxB), [nbit, nbit])
            # 1) naive
            # BTxB_inv = tf.matrix_inverse(BTxB)

            # 2) try excpet ->doesn't work well due to poor tf.matrix_inverse
            # try:
            #     BTxB_inv = tf.matrix_inverse(BTxB, adjoint=None, name=None)
            # except:
            #     BTxB_ttt = tf.add(BTxB, tf.math.scalar_mul(tf.identity((BTxB.shape)), 1e-4))
            #     BTxB_inv = tf.matrix_inverse(BTxB_ttt, adjoint=None, name=None)

            # calculate BTxX
            BTxX = []
            for i in range(nbit):
                BTxXi0 = tf.multiply(BT[i], reshape_x)
                BTxXi0 = tf.reduce_sum(BTxXi0)
                BTxX.append(BTxXi0)
            BTxX = tf.reshape(tf.stack(values=BTxX), [nbit, 1])

            # new_basis = tf.matmul(BTxB_inv, BTxX)  # calculate new basis
            # 3) gaussian elimination
            new_basis = tf.linalg.lstsq(BTxB,
                                        BTxX,
                                        fast=False,
                                        l2_regularizer=1e-5)

            # create moving averages op
            updata_moving_basis = moving_averages.assign_moving_average(
                basis, new_basis, MOVING_AVERAGES_FACTOR)
            add_model_variable(basis)
            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, updata_moving_basis)

            for i in range(nbit):
                tf.summary.scalar('basis%d' % i, new_basis[i][0])

        x_clip = tf.minimum(x, levels[num_levels - 1])  # gradient clip
        y = x_clip + tf.stop_gradient(-x_clip) + tf.stop_gradient(
            y)  # gradient: y=clip(x)
        y.variables = VariableHolder(basis=basis)
        return y