Ejemplo n.º 1
0
def run_block_sparsity(sess, mask, config):
    block_params = calc_block_params(config.xsize, config.bsize, config.ksize, config.strides,
                                     config.padding)
    ind = convert_mask_to_indices_custom(mask, block_params, config.tol, config.avgpool)
    ind_val, bin_val = sess.run([ind.active_block_indices, ind.bin_counts])
    block_density = bin_val[0] / float(ind_val.shape[0])
    return 1 - block_density
Ejemplo n.º 2
0
def gather_custom(x, mask, bsize, ksize, strides, padding):
    x_shape = [int(ss) for ss in x.get_shape()]
    block_params = calc_block_params(x_shape, bsize, ksize, strides, padding)
    indices = convert_mask_to_indices_custom(mask, block_params, 0.0)
    p = sbnet_module.sparse_gather(x,
                                   indices.bin_counts,
                                   indices.active_block_indices,
                                   bsize=block_params.bsize,
                                   boffset=block_params.boffset,
                                   bstride=block_params.bstrides)
    return p, indices
Ejemplo n.º 3
0
def _build_res_block(mask,
                     config,
                     x_init,
                     ind_init,
                     bin_init,
                     n_repeat=N_REPEAT):
    """Buildds a computation graph for a single residual block."""
    ksize_list = [[1, 1, config.ksize[2], config.ksize[3]]]
    ksize_list += [[3, 3, config.ksize[3], config.ksize[3]]]
    ksize_list += [[1, 1, config.ksize[3], config.ksize[2]]]
    xs = []
    ys = []
    if config.is_sparse:
        with tf.control_dependencies([mask]):
            dt0 = cuda_timer_start_op("my_timer")
            block_params = calc_block_params_res_block(config.xsize,
                                                       config.bsize,
                                                       ksize_list,
                                                       config.strides,
                                                       config.padding)
            ind = convert_mask_to_indices_custom(mask, block_params,
                                                 config.tol, config.avgpool)
        for _ in six.moves.xrange(n_repeat):
            x_ = tf.Variable(x_init)
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('sparse_{}'.format(_)):
                    y_ = _sparse_res_block_with_mask(x_, ksize_list,
                                                     block_params,
                                                     config.strides, ind_init,
                                                     bin_init)
                xs.append(x_)
                ys.append(y_)
    else:
        ind = None
        for _ in six.moves.xrange(n_repeat):
            x_ = tf.Variable(tf.transpose(x_init, [0, 3, 1, 2]))  # NCHW
            with tf.control_dependencies([x_]):
                dt0 = cuda_timer_start_op("my_timer")
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('dense_{}'.format(_)):
                    y_ = res_block_bottleneck(x_,
                                              ksize_list,
                                              config.strides,
                                              True,
                                              data_format='NCHW',
                                              w_project=None,
                                              no_activation=False)
                xs.append(x_)
                ys.append(y_)
    with tf.control_dependencies(ys):
        dt = cuda_timer_end_op("my_timer")
        with tf.control_dependencies([dt]):
            y = tf.no_op()
    return y, ind, dt
Ejemplo n.º 4
0
def _build_res_block(mask, config, x_init, ind_init, bin_init, n_repeat=N_REPEAT):
    """Buildds a computation graph for a single residual block."""
    ksize_list = [[1, 1, config.ksize[2], config.ksize[3]]]
    ksize_list += [[3, 3, config.ksize[3], config.ksize[3]]]
    ksize_list += [[1, 1, config.ksize[3], config.ksize[2]]]
    xs = []
    ys = []
    if config.is_sparse:
        # pre-create xs to exclude from timing, need independent variables to disable TF identical subgraph folding
        for i in six.moves.xrange(n_repeat):
            with tf.variable_scope('sparse_{}'.format(i)):
                xs.append( tf.Variable(x_init) )
        with tf.control_dependencies(xs):
            dt0 = cuda_timer_start_op()
        with tf.control_dependencies([mask, dt0]):
            block_params = calc_block_params_res_block(config.xsize, config.bsize, ksize_list,
                                                       config.strides, config.padding)
            ind = convert_mask_to_indices_custom(mask, block_params, config.tol, config.avgpool)
        for i in six.moves.xrange(n_repeat):
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('sparse_{}'.format(i)):
                    y_ = _sparse_res_block_with_mask(xs[i], ksize_list, block_params, config.strides,
                                                     ind_init, bin_init)
                ys.append(y_)
    else:
        ind = None
        # pre-create xs to exclude from timing, need independent variables to disable TF identical subgraph folding
        for i in six.moves.xrange(n_repeat):
            with tf.variable_scope('dense_{}'.format(i)):
                xs.append(tf.Variable(tf.transpose(x_init, [0, 3, 1, 2])))    # NCHW
        with tf.control_dependencies(xs):
            dt0 = cuda_timer_start_op()
        for i in six.moves.xrange(n_repeat):
            with tf.control_dependencies(ys + [dt0]):
                with tf.variable_scope('dense_{}'.format(i)):
                    y_ = res_block_bottleneck(
                        xs[i],
                        ksize_list,
                        config.strides,
                        True,
                        data_format='NCHW',
                        w_project=None,
                        no_activation=False)
                ys.append(y_)
    with tf.control_dependencies(ys+[dt0]):
        dt = cuda_timer_end_op(dt0)
        with tf.control_dependencies(ys+[dt]):
            y = tf.concat(ys, 0)
    return y, ind, dt
Ejemplo n.º 5
0
def _build_conv(mask, config, x_init, ind_init, bin_init, n_repeat=N_REPEAT):
    """Builds a computation graph for a single convolution."""
    wnp = np.random.uniform(-1, 1, config.ksize)  # filter is RSCK
    w = tf.constant(wnp, dtype=tf.float32)
    # AP: Tensorflow doesn't support KCRS from my investigation
    #wt = tf.constant(np.transpose(wnp, [3, 2, 0, 1]), dtype=tf.float32) # transpose to KCRS
    xs = []
    ys = []
    if config.is_sparse:
        with tf.control_dependencies([mask]):
            dt0 = cuda_timer_start_op("my_timer")
            block_params = calc_block_params(config.xsize, config.bsize,
                                             config.ksize, config.strides,
                                             config.padding)
            ind = convert_mask_to_indices_custom(mask, block_params,
                                                 config.tol, config.avgpool)
        for _ in six.moves.xrange(n_repeat):
            x_ = tf.Variable(
                x_init
            )  # no need to transpose here since gather/scatter transpose
            with tf.control_dependencies(ys + [dt0]):
                y_ = _sparse_conv2d_custom_with_mask(x_, w, block_params,
                                                     config.strides, ind_init,
                                                     bin_init)
                xs.append(x_)
                ys.append(y_)
    else:
        ind = None
        for _ in six.moves.xrange(n_repeat):
            x_ = tf.Variable(tf.transpose(x_init, [0, 3, 1, 2]))  # NCHW
            with tf.control_dependencies([x_]):
                dt0 = cuda_timer_start_op("my_timer")
            with tf.control_dependencies(ys + [dt0]):
                y_ = tf.nn.conv2d(x_,
                                  w,
                                  config.strides,
                                  config.padding,
                                  data_format='NCHW')
                xs.append(x_)
                ys.append(y_)
    with tf.control_dependencies(ys):
        dt = cuda_timer_end_op("my_timer")
        with tf.control_dependencies([dt]):
            y = tf.no_op()
    return y, ind, dt
Ejemplo n.º 6
0
    def _test_sparse_scatter(self, mask, x, w, out_shape, bsize, ksize,
                             strides, padding):
        with tf.Session() as sess:
            x = tf.constant(x)
            w = tf.constant(w)
            p, blk_indices = gather_tf(x, mask, bsize, ksize, strides, padding)
            block_params = calc_block_params([int(ss) for ss in x.get_shape()],
                                             bsize, ksize, strides, padding)
            ind_custom = convert_mask_to_indices_custom(
                mask, block_params, 0.0)
            p_custom = sbnet_module.sparse_gather(
                x,
                ind_custom.bin_counts,
                ind_custom.active_block_indices,
                bsize=block_params.bsize,
                bstride=block_params.bstrides,
                boffset=block_params.boffset)
            p_shape = [
                int(x.get_shape()[0]), block_params.bsize[0],
                block_params.bsize[1],
                int(x.get_shape()[3])
            ]
            q = tf.nn.conv2d(p, w, strides, 'VALID')
            q_custom = tf.nn.conv2d(p_custom, w, strides, 'VALID')
            y_tf = scatter_tf(q, blk_indices, out_shape)
            q_shape = calc_out_size_4d_np(p_shape, ksize, strides, 'VALID')
            bsize_out = [q_shape[1], q_shape[2]]
            boffset = [0, 0]
            y_custom = scatter_custom(q_custom, ind_custom, out_shape,
                                      bsize_out, boffset,
                                      block_params.bstrides)
            p1, p2, q_val, y1, y2, active, num = sess.run([
                p, p_custom, q, y_tf, y_custom,
                ind_custom.active_block_indices, ind_custom.bin_counts
            ])
            num = num[0]
            sortIdx = active[:num].argsort()
            p2 = p2[sortIdx]

            # Make sure p's are the same.
            np.testing.assert_array_equal(p1, p2)

            # Check y's are the same.
            np.testing.assert_array_equal(y1, y2)
Ejemplo n.º 7
0
    def _test_sparse_scatter(self, mask, x, w, out_shape, bsize, ksize,
                             strides, padding):
        with tf.Session() as sess:
            x = tf.constant(x)
            w = tf.constant(w)
            p, blk_indices = gather_tf(x, mask, bsize, ksize, strides, padding)
            block_params = calc_block_params([int(ss) for ss in x.get_shape()],
                                             bsize, ksize, strides, padding)
            ind_custom = convert_mask_to_indices_custom(
                mask, block_params, 0.0)
            p_custom = sbnet_module.sparse_gather(
                x,
                ind_custom.bin_counts,
                ind_custom.active_block_indices,
                dynamic_bsize=tf.constant(block_params.bsize, tf.int32),
                dynamic_bstride=tf.constant(block_params.bstrides, tf.int32),
                dynamic_boffset=tf.constant(block_params.boffset, tf.int32))
            p_shape = [
                int(x.get_shape()[0]), block_params.bsize[0],
                block_params.bsize[1],
                int(x.get_shape()[3])
            ]
            q = tf.nn.conv2d(p, w, strides, 'VALID')
            q_custom = tf.nn.conv2d(p_custom, w, strides, 'VALID')
            y_tf = scatter_tf(q, blk_indices, out_shape)
            q_shape = calc_out_size_4d_np(p_shape, ksize, strides, 'VALID')
            bsize_out = [q_shape[1], q_shape[2]]
            boffset = [0, 0]
            y_custom = scatter_custom(q_custom, ind_custom, out_shape,
                                      bsize_out, boffset,
                                      block_params.bstrides)
            p1, p2, q_val, y1, y2, active, num = sess.run([
                p, p_custom, q, y_tf, y_custom,
                ind_custom.active_block_indices, ind_custom.bin_counts
            ])

            # Make sure p's are the same.
            l1 = tuple([tuple(x) for x in p1.reshape(-1, 3).tolist()])
            l2 = tuple([tuple(x) for x in p2.reshape(-1, 3).tolist()])
            np.testing.assert_array_equal(set(l1), set(l2))

            # Check y's are the same.
            np.testing.assert_array_equal(y1, y2)
Ejemplo n.º 8
0
 def _test_sparse_conv2d_custom_with_mask(self,
                                          mask,
                                          bsize,
                                          ksize,
                                          strides,
                                          padding,
                                          y_exp,
                                          use_var=True,
                                          transpose=False):
     # Currently we don't care about VALID convolution.
     assert padding == 'SAME', 'We do not support VALID conv at the moment.'
     mask_ = tf.constant(mask)
     blk_params = calc_block_params(
         list(mask.shape) + [ksize[2]], bsize, ksize, strides, padding)
     ind = convert_mask_to_indices_custom(mask_, blk_params, 0.)
     xval = np.ones([1, mask.shape[1], mask.shape[2], 1], dtype=np.float32)
     x = tf.constant(xval)
     if use_var:
         x = tf.Variable(x)
     w = tf.constant(np.ones(ksize, dtype=np.float32))
     y = sparse_conv2d_custom(x,
                              w,
                              ind,
                              blk_params,
                              strides,
                              use_var=use_var,
                              transpose=transpose)
     # Manually paste the input tensor in the expected output.
     y_exp = (y_exp == 0).astype(
         np.float32) * xval[:, :y_exp.shape[1], :y_exp.shape[2], :] + y_exp
     with self.test_session() as sess:
         if use_var:
             sess.run(tf.variables_initializer([x]))
         y_act = y.eval()
         # print('===============')
         # print('Actual')
         # print(y_act.reshape([y_act.shape[1], y_act.shape[2]]))
         # print('Expected')
         # print(y_exp.reshape([y_exp.shape[1], y_exp.shape[2]]))
         # print(y_exp.shape)
         self.assertEqual(y_act.size, y_exp.size)
         np.testing.assert_array_equal(y_act.reshape(y_exp.shape), y_exp)
Ejemplo n.º 9
0
    def _test_reduce_mask(self, mask, bsize, ksize, strides, padding):
        with tf.Session():
            mask = tf.constant(mask)
            indices = convert_mask_to_indices(mask, bsize, ksize, strides,
                                              padding, 0.0)
            indices_val = indices.eval()

            x_shape = [1] + [int(ss) for ss in mask.get_shape()[1:]] + [1]
            block_params = calc_block_params(x_shape, bsize, ksize, strides,
                                             padding)
            indices_custom = convert_mask_to_indices_custom(
                mask, block_params, 0.0)

            activeBlockIndicesResult = indices_custom.active_block_indices.eval(
            )
            binCountsResult = indices_custom.bin_counts.eval()
            clippedResults = activeBlockIndicesResult[:binCountsResult[0], :]
            clippedResults = to_tuples(clippedResults.tolist())
            refResults = to_tuples(indices_val.tolist())
            np.testing.assert_equal(set(clippedResults), set(refResults))
Ejemplo n.º 10
0
    def _test_reduce_mask(self, mask, bsize, ksize, strides, padding):
        with tf.Session():
            mask = tf.constant(mask)
            indices = convert_mask_to_indices(mask, bsize, ksize, strides,
                                              padding, 0.0)
            x_shape = [1] + [int(ss) for ss in mask.get_shape()[1:]] + [1]
            block_params = calc_block_params(x_shape, bsize, ksize, strides,
                                             padding)
            indices_custom = convert_mask_to_indices_custom(
                mask, block_params, 0.0)

            activeBlockIndicesResult = indices_custom.active_block_indices.eval(
            )
            binCountsResult = indices_custom.bin_counts.eval()
            activeBlockIndicesResult = activeBlockIndicesResult[:
                                                                binCountsResult[
                                                                    0]]
            sortIdx = activeBlockIndicesResult.argsort()
            activeBlockIndicesResult = activeBlockIndicesResult[sortIdx]
            clippedResults = np.copy(activeBlockIndicesResult.view(np.uint16))
            clippedResults = clippedResults.reshape([-1, 4])[:, [2, 1, 0]]
            indices_val = indices.eval()
            np.testing.assert_array_equal(indices_val, clippedResults)
Ejemplo n.º 11
0
    def _test_sparse_conv2d_gradient(self, mask, bsize, ksize, strides, padding, transpose=False):
        # Currently we don't care about VALID convolution.
        assert padding == 'SAME', 'We do not support VALID conv at the moment.'
        use_var = False
        mask_ = tf.constant(mask)
        blk_params = calc_block_params(
            list(mask.shape) + [ksize[2]], bsize, ksize, strides, padding)
        ind = convert_mask_to_indices_custom(mask_, blk_params, 0.)
        ReduceMask = namedtuple('ReduceMask', ['active_block_indices', 'bin_counts'])
        ind.active_block_indices.set_shape([27, 3])
        ind.bin_counts.set_shape([1])
        ind_var = tf.Variable(ind.active_block_indices, trainable=False)
        bin_var = tf.Variable(ind.bin_counts, trainable=False)
        ind_fixed = ReduceMask(active_block_indices=ind_var, bin_counts=bin_var)
        rnd = np.random.RandomState(0)
        batch_size = 1
        xval = rnd.uniform(-0.1, 0.1, [mask.shape[0], mask.shape[1], mask.shape[2],
                                       ksize[2]]).astype(np.float32)
        x = tf.constant(xval)
        wval = rnd.uniform(-1, 1, ksize).astype(np.float32)
        w = tf.constant(wval)
        y = sparse_conv2d_custom(
            x, w, ind_fixed, blk_params, strides, use_var=use_var, transpose=transpose)
        print('')
        print('-' * 55)
        print('Sparse Conv Layer')
        print('{:30s} {:>10s} {:>10s}'.format('name', 'grad angle', 'abs err'))
        with self.test_session() as sess:
            sess.run(tf.global_variables_initializer())
            yval = y.eval()
            err = compute_gradient_angle(x, xval.shape, y, yval.shape, x_init_value=xval)
            err2 = compute_gradient_abs_error(x, xval.shape, y, yval.shape, x_init_value=xval)
            print('{:30s} {:>10.3f} {:>10.3f}'.format('x', err, err2))

            err = compute_gradient_angle(w, wval.shape, y, yval.shape, x_init_value=wval)
            err = compute_gradient_abs_error(w, wval.shape, y, yval.shape, x_init_value=wval)
            print('{:30s} {:>10.3f} {:>10.3f}'.format('w', err, err2))
Ejemplo n.º 12
0
    def _test_sparse_resblock_gradients(self,
                                        xval,
                                        maskval,
                                        bsize,
                                        strides,
                                        padding,
                                        data_format='NHWC',
                                        dynamic_size=False):
        with tf.Graph().as_default() as g:
            x = tf.constant(xval)
            mask = tf.constant(maskval)
            ch_in = xval.shape[3]
            ch_out = xval.shape[3] // 4
            ksize_list = [[1, 1, ch_in, ch_out], [3, 3, ch_out, ch_out],
                          [1, 1, ch_out, ch_in]]
            if dynamic_size:
                blk_params = calc_block_params_res_block(
                    tf.shape(xval), bsize, ksize_list, strides, padding)
            else:
                blk_params = calc_block_params_res_block(
                    xval.shape, bsize, ksize_list, strides, padding)
            ind = convert_mask_to_indices_custom(mask, blk_params, 0.)
            ReduceMask = namedtuple('ReduceMask',
                                    ['active_block_indices', 'bin_counts'])
            ind.active_block_indices.set_shape([27, 3])
            ind.bin_counts.set_shape([1])
            ind_var = tf.Variable(ind.active_block_indices, trainable=False)
            bin_var = tf.Variable(ind.bin_counts, trainable=False)
            ind_fixed = ReduceMask(active_block_indices=ind_var,
                                   bin_counts=bin_var)
            tf_ind = convert_mask_to_indices_custom(mask, blk_params, 0.)
            with self.test_session() as sess:
                py_inds = sess.run([tf_ind])
            ind = lambda: 0
            ind.bin_counts = tf.constant(py_inds[0].bin_counts)
            ind.active_block_indices = tf.constant(
                py_inds[0].active_block_indices)

            y = sparse_res_block_bottleneck(x,
                                            ksize_list,
                                            ind_fixed,
                                            blk_params,
                                            strides,
                                            is_training=True,
                                            data_format=data_format,
                                            w_project=None,
                                            no_activation=False,
                                            use_var=False)
            trainable_vars = tf.trainable_variables()
            print('')
            print('-' * 55)
            print('Sparse Residual')
            print('{:30s} {:>10s} {:>10s}'.format('name', 'grad angle',
                                                  'abs err'))
            with self.test_session() as sess:
                sess.run(tf.global_variables_initializer())
                yval = y.eval()
                err = compute_gradient_angle(x,
                                             xval.shape,
                                             y,
                                             yval.shape,
                                             x_init_value=xval)
                err2 = compute_gradient_abs_error(x,
                                                  xval.shape,
                                                  y,
                                                  yval.shape,
                                                  x_init_value=xval)
                print('{:30s} {:>10.3f} {:>10.3f}'.format('x', err, err2))

                #'sub3/bn3/batchnorm/add_1:0',
                for name in [
                        'SparseScatter:0', 'SparseGather:0',
                        'sub3/bn3/FusedBatchNorm:0', 'sub3/conv3/Conv2D:0',
                        'sub3/relu3:0', 'sub2/conv2/Conv2D:0', 'sub2/relu2:0',
                        'sub2/bn2/FusedBatchNorm:0', 'sub1/conv1/Conv2D:0',
                        'sub1/relu1:0', 'sub1/bn1/FusedBatchNorm:0'
                ]:
                    act = g.get_tensor_by_name(name)
                    actval = act.eval()
                    err = compute_gradient_angle(act,
                                                 actval.shape,
                                                 y,
                                                 yval.shape,
                                                 x_init_value=actval)
                    err2 = compute_gradient_abs_error(act,
                                                      actval.shape,
                                                      y,
                                                      yval.shape,
                                                      x_init_value=actval)
                    print('{:30s} {:>10.3f} {:>10.3f}'.format(name, err, err2))

                for vv in trainable_vars:
                    vvval = vv.eval()
                    err = compute_gradient_angle(vv,
                                                 vvval.shape,
                                                 y,
                                                 yval.shape,
                                                 x_init_value=vvval)
                    err2 = compute_gradient_abs_error(vv,
                                                      vvval.shape,
                                                      y,
                                                      yval.shape,
                                                      x_init_value=vvval)
                    print('{:30s} {:>10.3f} {:>10.3f}'.format(
                        vv.name, err, err2))