コード例 #1
0
def _build_res_block(mask,
                     config,
                     x_init,
                     ind_init,
                     bin_init,
                     n_repeat=N_REPEAT):
    """Buildds a computation graph for a single residual block."""
    ksize_list = [[1, 1, config.ksize[2], config.ksize[3]]]
    ksize_list += [[3, 3, config.ksize[3], config.ksize[3]]]
    ksize_list += [[1, 1, config.ksize[3], config.ksize[2]]]
    xs = []
    ys = []
    if config.is_sparse:
        with tf.control_dependencies([mask]):
            dt0 = cuda_timer_start_op("my_timer")
            block_params = calc_block_params_res_block(config.xsize,
                                                       config.bsize,
                                                       ksize_list,
                                                       config.strides,
                                                       config.padding)
            ind = convert_mask_to_indices_custom(mask, block_params,
                                                 config.tol, config.avgpool)
        for _ in six.moves.xrange(n_repeat):
            x_ = tf.Variable(x_init)
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('sparse_{}'.format(_)):
                    y_ = _sparse_res_block_with_mask(x_, ksize_list,
                                                     block_params,
                                                     config.strides, ind_init,
                                                     bin_init)
                xs.append(x_)
                ys.append(y_)
    else:
        ind = None
        for _ in six.moves.xrange(n_repeat):
            x_ = tf.Variable(tf.transpose(x_init, [0, 3, 1, 2]))  # NCHW
            with tf.control_dependencies([x_]):
                dt0 = cuda_timer_start_op("my_timer")
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('dense_{}'.format(_)):
                    y_ = res_block_bottleneck(x_,
                                              ksize_list,
                                              config.strides,
                                              True,
                                              data_format='NCHW',
                                              w_project=None,
                                              no_activation=False)
                xs.append(x_)
                ys.append(y_)
    with tf.control_dependencies(ys):
        dt = cuda_timer_end_op("my_timer")
        with tf.control_dependencies([dt]):
            y = tf.no_op()
    return y, ind, dt
コード例 #2
0
def _build_res_block(mask, config, x_init, ind_init, bin_init, n_repeat=N_REPEAT):
    """Buildds a computation graph for a single residual block."""
    ksize_list = [[1, 1, config.ksize[2], config.ksize[3]]]
    ksize_list += [[3, 3, config.ksize[3], config.ksize[3]]]
    ksize_list += [[1, 1, config.ksize[3], config.ksize[2]]]
    xs = []
    ys = []
    if config.is_sparse:
        # pre-create xs to exclude from timing, need independent variables to disable TF identical subgraph folding
        for i in six.moves.xrange(n_repeat):
            with tf.variable_scope('sparse_{}'.format(i)):
                xs.append( tf.Variable(x_init) )
        with tf.control_dependencies(xs):
            dt0 = cuda_timer_start_op()
        with tf.control_dependencies([mask, dt0]):
            block_params = calc_block_params_res_block(config.xsize, config.bsize, ksize_list,
                                                       config.strides, config.padding)
            ind = convert_mask_to_indices_custom(mask, block_params, config.tol, config.avgpool)
        for i in six.moves.xrange(n_repeat):
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('sparse_{}'.format(i)):
                    y_ = _sparse_res_block_with_mask(xs[i], ksize_list, block_params, config.strides,
                                                     ind_init, bin_init)
                ys.append(y_)
    else:
        ind = None
        # pre-create xs to exclude from timing, need independent variables to disable TF identical subgraph folding
        for i in six.moves.xrange(n_repeat):
            with tf.variable_scope('dense_{}'.format(i)):
                xs.append(tf.Variable(tf.transpose(x_init, [0, 3, 1, 2])))    # NCHW
        with tf.control_dependencies(xs):
            dt0 = cuda_timer_start_op()
        for i in six.moves.xrange(n_repeat):
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('dense_{}'.format(i)):
                    y_ = res_block_bottleneck(
                        xs[i],
                        ksize_list,
                        config.strides,
                        True,
                        data_format='NCHW',
                        w_project=None,
                        no_activation=False)
                ys.append(y_)
    with tf.control_dependencies(ys+[dt0]):
        dt = cuda_timer_end_op(dt0)
        with tf.control_dependencies(ys+[dt]):
            y = tf.concat(ys, 0)
    return y, ind, dt
コード例 #3
0
    def _test_resblock_gradients(self, xval, maskval, bsize, strides, padding, data_format='NHWC'):
        with tf.Graph().as_default() as g:
            x = tf.constant(xval)
            mask = tf.constant(maskval)
            ch_in = xval.shape[3]
            ch_out = xval.shape[3] // 4
            ksize_list = [[1, 1, ch_in, ch_out], [3, 3, ch_out, ch_out], [1, 1, ch_out, ch_in]]
            y = res_block_bottleneck(
                x,
                ksize_list,
                strides,
                is_training=True,
                data_format=data_format,
                w_project=None,
                no_activation=False)
            trainable_vars = tf.trainable_variables()
            print('')
            print('-' * 55)
            print('Dense Residual')
            print('{:30s} {:>10s} {:>10s}'.format('name', 'grad angle', 'abs err'))
            with self.test_session() as sess:
                sess.run(tf.global_variables_initializer())
                yval = y.eval()
                err = compute_gradient_angle(x, xval.shape, y, yval.shape, x_init_value=xval)
                err2 = compute_gradient_abs_error(x, xval.shape, y, yval.shape, x_init_value=xval)
                print('{:30s} {:>10.3f} {:>10.3f}'.format('x', err, err2))

                for name in [
                        'sub3/conv3/Conv2D:0', 'sub3/relu3:0', 'sub3/bn3/FusedBatchNorm:0',
                        'sub2/conv2/Conv2D:0', 'sub2/relu2:0', 'sub2/bn2/FusedBatchNorm:0',
                        'sub1/conv1/Conv2D:0', 'sub1/relu1:0', 'sub1/bn1/FusedBatchNorm:0'
                ]:
                    act = g.get_tensor_by_name(name)
                    actval = act.eval()
                    err = compute_gradient_angle(
                        act, actval.shape, y, yval.shape, x_init_value=actval)
                    err2 = compute_gradient_abs_error(
                        act, actval.shape, y, yval.shape, x_init_value=actval)
                    print('{:30s} {:>10.3f} {:>10.3f}'.format(name, err, err2))

                # self.assertTrue(err < 0.001)
                for vv in trainable_vars:
                    vvval = vv.eval()
                    err = compute_gradient_angle(vv, vvval.shape, y, yval.shape, x_init_value=vvval)
                    err2 = compute_gradient_abs_error(
                        vv, vvval.shape, y, yval.shape, x_init_value=vvval)
                    print('{:30s} {:>10.3f} {:>10.3f}'.format(vv.name, err, err2))