Beispiel #1
0
def verifyValues(filter_in_sizes,
                 rho_filter=1,
                 dim=5,
                 scale_val=0.1,
                 bias_val=0.1,
                 num_trials=3):
    out_channel_count = filter_in_sizes[-1]
    if isinstance(bias_val, collections.Iterable):
        bias = np.array(bias_val, dtype=np.float32)
    else:
        bias = np.array([bias_val] * out_channel_count, dtype=np.float32)
    scale = np.array(scale_val, dtype=np.float32)
    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    with tf.device("/gpu:0"):
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t2sh)
    with tf.Session(config=config) as sess:
        pf = sess.run(convf)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        creg = sc_module.direct_sparse_channelwise_biased_l2_regularization(
            pf.out_indices, pf.out_values, pf.out_shape,
            pf.out_channel_mapping, scale, bias, dim)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(creg)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(creg)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    time.sleep(1)
    reg_loss = 0
    for out_channel in range(out_channel_count):
        reg_loss += np.sum(
            np.power(d2[:, :, :, :, out_channel] + bias[out_channel],
                     2)) * scale / 2.

    print(sv3, reg_loss)
    if abs(sv3 - reg_loss) > 0.001:
        print("error")
        return 1

    #bp_sfg = sp.sparse1d_to_dense(pf.out_indices, res_bp3.filter_grads, pf.out_shape, pf.out_channel_mapping[-1])

    return 0
Beispiel #2
0
def verifyValues(tensor_in_sizes,
                 filter_in_sizes,
                 stride,
                 rho_data=0.1,
                 rho_filter=1,
                 padding='SAME',
                 dim=5,
                 max_density=0.1,
                 num_trials=3,
                 filter_type="K-RELU",
                 test_type=""):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    out_sizes = np.copy(tensor_in_sizes)
    out_sizes[-1] = filter_in_sizes[-1]
    out_entry_count = np.prod(out_sizes) * max_density
    bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32)
    no_strides = [1, 1, 1, 1, 1]
    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes, -3, 3)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)
    print("filter shape", filter_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t1sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
        pf = sess.run(convf)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        approx_scskconv = sc_module.direct_sparse_conv_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
            pf.out_shape, pf.out_channel_mapping, bias, strides, padding,
            out_entry_count, dim, max_density, filter_type)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(approx_scskconv)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(approx_scskconv)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    time.sleep(1)

    td = 0
    with tf.device("/gpu:0"):
        conv = nn_ops.conv3d(d1, d2, strides, padding)
    with tf.Session(config=config) as sess:
        t22 = time.time()
        expected = sess.run(conv)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(conv)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time dense gpu: ", td)
    tf.reset_default_graph()

    print("time ratio: ", ts / td)
    return

    [bp_ind, sv3_bp_val,
     bp_sh] = sp.createRandomSparseTensor(1, [len(sv3.out_values)], 1, 9)
    d3_ = sp.sparse1d_to_dense(sv3.out_indices, sv3_bp_val, sv3.out_shape,
                               sv3.out_block_channel_mapping[-1])
    out_backprop_val = constant_op.constant(d3_)

    t_bp1 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = nn_ops.conv3d_backprop_filter_v2(d1, filter_in_sizes,
                                                   out_backprop_val, strides,
                                                   padding)
        res_bp1 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp1 = t_bp1 + t2 - t1
    t_bp1 = t_bp1 / float(num_trials)
    print("time bp1: ", t_bp1)

    t_bp2 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = nn_ops.conv3d_backprop_input_v2(tensor_in_sizes, d2,
                                                  out_backprop_val, strides,
                                                  padding)
        res_bp2 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp2 = t_bp2 + t2 - t1
    t_bp2 = t_bp2 / float(num_trials)
    print("time bp2: ", t_bp2)

    t_bp3 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = sc_module.direct_sparse_conv_kd_backprop(
                pd.out_indices, pd.out_values, pd.out_shape,
                pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
                pf.out_shape, pf.out_channel_mapping, sv3.out_indices,
                sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping,
                sv3_bp_val, strides, padding, dim, max_density)
        res_bp3 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp3 = t_bp3 + t2 - t1
    t_bp3 = t_bp3 / float(num_trials)
    print("time bp3: ", t_bp3)
    print("sparse ratio: ", t_bp3 / (t_bp2 + t_bp1))

    bp_sfg = sp.sparse1d_to_dense(pf.out_indices, res_bp3.filter_grads,
                                  pf.out_shape, pf.out_channel_mapping[-1])
    bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3.input_grads,
                                  pd.out_shape,
                                  pd.out_block_channel_mapping[-1])
    value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values,
                                  sv3.out_shape,
                                  sv3.out_block_channel_mapping[-1])
    print("expected", expected)
    print("sv3", value3)
    print("out densities", sv3.out_channel_densities)

    has_error = False
    approx_cmp = expected.flatten()
    approx = value3.flatten()
    non_zero_count = 0
    for i in range(len(approx_cmp)):
        non_zero_count = non_zero_count + 1
    print("entry count: ", non_zero_count)
    error_cnt = 0
    first_error = 0
    correct_cnt = 0
    for i in range(len(approx_cmp)):
        if approx_cmp[i] > 0 and abs(approx_cmp[i] - approx[i]) > 1e-3:
            if has_error == False:
                first_error = i
            has_error = True
            error_cnt = error_cnt + 1
        elif approx[i] != 0:
            correct_cnt = correct_cnt + 1

    bp_sig_flat = bp_sig.flatten()
    res_bp2_flat = res_bp2.flatten()
    bp_i_error_cnt = 0
    bp_i_correct_cnt = 0
    for i in range(len(bp_sig_flat)):
        if bp_sig_flat[i] != 0:
            if bp_sig_flat[i] == res_bp2_flat[i]:
                bp_i_correct_cnt = bp_i_correct_cnt + 1
            else:
                bp_i_error_cnt = bp_i_error_cnt + 1

    filter_flat = d2.flatten()
    bp_sfg_flat = bp_sfg.flatten()
    res_bp1_flat = res_bp1.flatten()
    bp_f_error_cnt = 0
    bp_f_correct_cnt = 0
    for i in range(len(filter_flat)):
        if filter_flat[i] != 0:
            if bp_sfg_flat[i] == res_bp1_flat[i]:
                bp_f_correct_cnt = bp_f_correct_cnt + 1
            else:
                bp_f_error_cnt = bp_f_error_cnt + 1

    print("total number of non-zero corrects: ", correct_cnt)
    print("sparse input size: ", len(t1ind))
    print("total number of bpi corrects: ", bp_i_correct_cnt)
    print("sparse filter size: ", len(t2ind))
    print("total number of bpf corrects: ", bp_f_correct_cnt)
    if has_error:
        print("total number of errors: ", error_cnt)
        print("first error: ", first_error)
        return 1
    if bp_i_error_cnt > 0:
        print("total number of  bpi errors: ", bp_i_error_cnt)
    if bp_f_error_cnt > 0:
        print("total number of  bpf errors: ", bp_f_error_cnt)
    print("OK")
    return 0
Beispiel #3
0
def verifyValues(tensor_in_sizes,
                 filter_in_sizes,
                 stride,
                 rho_data=0.1,
                 rho_filter=1,
                 padding='SAME',
                 dim=5,
                 max_density=0.1,
                 num_trials=3,
                 filter_type='K-RELU',
                 test_type='',
                 dense=True):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    out_sizes = np.copy(tensor_in_sizes)
    out_sizes[-1] = filter_in_sizes[-1]
    out_entry_count = np.prod(out_sizes) * max_density

    bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32)
    no_strides = [1, 1, 1, 1, 1]
    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes, -3, 3)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)
    print("filter shape", filter_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t1sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
        pf = sess.run(convf)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        approx_scskconv = sc_module.direct_sparse_conv_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
            pf.out_shape, pf.out_channel_mapping, bias, strides, padding,
            out_entry_count, dim, max_density, filter_type)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(approx_scskconv)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(approx_scskconv)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    time.sleep(1)

    if dense:
        td = 0
        with tf.device("/gpu:0"):
            conv = nn_ops.conv3d(d1, d2, strides, padding)
        with tf.Session(config=config) as sess:
            t22 = time.time()
            expected = sess.run(conv)
            t11 = time.time()
            for i in range(0, num_trials):
                sess.run(conv)
            t22 = time.time()
            td = abs(t22 - t11) / max(num_trials, 1)
            print("time dense gpu: ", td)
        tf.reset_default_graph()

        print("time ratio: ", ts / td)
        return [expected, sv3, ts, td]
def verifyValues(tensor_in_sizes,
                 rho_data=0.1,
                 dim=5,
                 num_trials=3,
                 test_type=""):

    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    #print("ind in: \n", t1ind)
    #print("input: \n", d1)

    # Initializes the input tensor with array containing incrementing
    # numbers from 1.
    print("input shape", tensor_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    #reorder data and generate block index lookup table
    td = 0
    with tf.device("/gpu:0"):
        dts = sc_module.direct_dense_to_sparse(d1, tensor_in_sizes, dim)
    with tf.Session(config=config) as sess:
        t22 = time.time()
        pd = sess.run(dts)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(dts)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time dense to sparse gpu: ", td)
    tf.reset_default_graph()

    expected = d1

    td = 0
    with tf.device("/gpu:0"):
        s2d = sc_module.direct_sparse_to_dense(pd.out_indices, pd.out_values,
                                               pd.out_shape,
                                               pd.out_block_channel_mapping)
    with tf.Session(config=config) as sess:
        t22 = time.time()
        sv3 = sess.run(s2d)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(s2d)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time sparse to dense gpu: ", td)
    tf.reset_default_graph()

    [bp_ind, sv3_bp_val,
     bp_sh] = sp.createRandomSparseTensor(1, tensor_in_sizes, 1, 9)
    d3_ = sp.sparse1d_to_dense(pd.out_indices, sv3_bp_val, pd.out_shape,
                               pd.out_block_channel_mapping[-1])
    out_backprop_val = constant_op.constant(d3_)

    t_bp3 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = sc_module.direct_sparse_to_dense_backprop(
                pd.out_indices, pd.out_values, pd.out_shape,
                pd.out_block_channel_mapping, sv3, out_backprop_val)
        res_bp3 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp3 = t_bp3 + t2 - t1
    t_bp3 = t_bp3 / float(num_trials)
    print("time bp sparse to dense: ", t_bp3)

    t_bp4 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = sc_module.direct_dense_to_sparse_backprop(
                sv3, pd.out_indices, pd.out_values, pd.out_shape,
                pd.out_block_channel_mapping, res_bp3)
        res_bp4 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp4 = t_bp3 + t2 - t1
    t_bp4 = t_bp4 / float(num_trials)
    print("time bp dense to sparse: ", t_bp4)

    bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3, pd.out_shape,
                                  pd.out_block_channel_mapping[-1])
    #print("dense bp ", res_bp1)
    #print("sparse bp: ", bp_sig)

    has_error = False
    approx_cmp = expected.flatten()
    approx = sv3.flatten()
    non_zero_count = 0
    for i in range(len(approx_cmp)):
        non_zero_count = non_zero_count + 1
    print("entry count: ", non_zero_count)
    error_cnt = 0
    first_error = 0
    correct_cnt = 0
    for i in range(len(approx_cmp)):
        if abs(approx_cmp[i] - approx[i]) > 1e-3:
            if has_error == False:
                first_error = i
            has_error = True
            error_cnt = error_cnt + 1
        elif approx[i] != 0:
            correct_cnt = correct_cnt + 1
    ebp = d3_.flatten()
    #rbp = bp_sig.flatten()
    rbp = res_bp4.flatten()
    bperror_cnt = 0
    bpcorrect_cnt = 0
    for i in range(len(ebp)):
        if abs(ebp[i] - rbp[i]) > 1e-3:
            bperror_cnt = bperror_cnt + 1
        elif rbp[i] != 0:
            bpcorrect_cnt = bpcorrect_cnt + 1

    print("total number of non-zero corrects: ", correct_cnt)
    print("total number of backprop corrects: ", bpcorrect_cnt)
    if has_error:
        print("total number of errors: ", error_cnt)
        print("first error: ", first_error)
        return 1
    if bperror_cnt > 0:
        print("total number of backprop errors: ", bperror_cnt)
    print("OK")
    return 0
Beispiel #5
0
def verifyValues(tensor_in_sizes,
                 filter_in_sizes,
                 stride,
                 rho_data=0.1,
                 rho_filter=1,
                 padding='SAME',
                 dim=5,
                 max_density=0.1,
                 num_trials=3,
                 filter_type="K-RELU",
                 test_type=""):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32)
    no_strides = [1, 1, 1, 1, 1]
    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes, -3, 3)
    s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes, -3, 3)
    s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh)
    d2 = sp.sparse_to_dense(t2ind, t2val, t2sh)

    filter_in_sizes2 = filter_in_sizes[:]
    filter_in_sizes2[-2] = filter_in_sizes2[-1]
    [t3ind, t3val, t3sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes2, -3, 3)
    s3 = tf.SparseTensor(indices=t3ind, values=t3val, dense_shape=t3sh)
    d3 = sp.sparse_to_dense(t3ind, t3val, t3sh)

    [t4ind, t4val, t4sh] = sp.createRandomSparseTensor(rho_filter,
                                                       filter_in_sizes2, -3, 3)
    s4 = tf.SparseTensor(indices=t4ind, values=t4val, dense_shape=t4sh)
    d4 = sp.sparse_to_dense(t4ind, t4val, t4sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)
    print("filter shape", filter_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.4

    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
        convf = sc_module.direct_sparse_filter_conversion(
            t2ind, t2val, t2sh, t1sh)
        convf2 = sc_module.direct_sparse_filter_conversion(
            t3ind, t3val, t3sh, t3sh)
        convf3 = sc_module.direct_sparse_filter_conversion(
            t4ind, t4val, t4sh, t4sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
        pf = sess.run(convf)
        pf2 = sess.run(convf2)
        pf3 = sess.run(convf3)

    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        net = sc_module.direct_sparse_conv_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, pf.out_indices, pf.out_values,
            pf.out_shape, pf.out_channel_mapping, bias, strides, padding, dim,
            max_density, filter_type)
        net = sc_module.direct_sparse_conv_kd(
            net.out_indices, net.out_values, net.out_shape,
            net.out_block_channel_mapping, pf2.out_indices, pf2.out_values,
            pf2.out_shape, pf2.out_channel_mapping, bias, strides, padding,
            dim, max_density, filter_type)
        net = sc_module.direct_sparse_conv_kd(
            net.out_indices, net.out_values, net.out_shape,
            net.out_block_channel_mapping, pf3.out_indices, pf3.out_values,
            pf3.out_shape, pf3.out_channel_mapping, bias, strides, padding,
            dim, max_density, filter_type)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(net)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(net)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    td = 0
    with tf.device("/gpu:0"):
        net = nn_ops.conv3d(d1, d2, strides, padding)
        if filter_type == "K-RELU":
            net = nn_ops.relu(net)
        net = nn_ops.conv3d(net, d3, strides, padding)
        if filter_type == "K-RELU":
            net = nn_ops.relu(net)
        net = nn_ops.conv3d(net, d4, strides, padding)
        if filter_type == "K-RELU":
            net = nn_ops.relu(net)
    with tf.Session(config=config) as sess:
        t22 = time.time()
        expected = sess.run(net)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(net)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time dense gpu: ", td)
    tf.reset_default_graph()

    value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values,
                                  sv3.out_shape,
                                  sv3.out_block_channel_mapping[-1])
    #print("expected: ", expected)
    #print("sparse: ", value3, sv3)
    has_error = False
    approx_cmp = expected.flatten()
    approx = value3.flatten()
    non_zero_count = 0
    for i in range(len(approx_cmp)):
        non_zero_count = non_zero_count + 1
    print("entry count: ", non_zero_count)
    error_cnt = 0
    first_error = 0
    correct_cnt = 0
    for i in range(len(approx_cmp)):
        if abs(approx_cmp[i] - approx[i]) > 1e-3:
            if has_error == False:
                first_error = i
            has_error = True
            error_cnt = error_cnt + 1
        elif approx[i] != 0:
            correct_cnt = correct_cnt + 1

    print("total number of non-zero corrects: ", correct_cnt)
    print("sparse input size: ", len(t1ind))
    if has_error:
        print("total number of errors: ", error_cnt)
        print("first error: ", first_error)
        return 1
    print("OK")
    return 0
def verifyValues(tensor_in_sizes,
                 stride,
                 rho_data=0.1,
                 padding='SAME',
                 dim=5,
                 max_density=1,
                 num_trials=3,
                 test_type=""):
    if isinstance(stride, collections.Iterable):
        strides = [1] + list(stride) + [1]
    else:
        strides = [1, stride, stride, stride, 1]

    no_strides = [1, 1, 1, 1, 1]

    [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data,
                                                       tensor_in_sizes)
    d1 = sp.sparse_to_dense(t1ind, t1val, t1sh)

    print("strides: \n", strides)
    print("input shape", tensor_in_sizes)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.7

    #reorder data and generate block index lookup table
    with tf.device("/gpu:0"):
        convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh)
    with tf.Session(config=config) as sess:
        pd = sess.run(convd)
    tf.reset_default_graph()

    ts = 0
    with tf.device("/gpu:0"):
        approx_scskconv = sc_module.direct_sparse_max_pooling_kd(
            pd.out_indices, pd.out_values, pd.out_shape,
            pd.out_block_channel_mapping, strides, dim)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv3 = sess.run(approx_scskconv)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(approx_scskconv)
        t6 = time.time()
        ts = abs(t6 - t5) / max(num_trials, 1)
        print("time approx sparse: ", ts)
    tf.reset_default_graph()

    td = 0
    with tf.device("/gpu:0"):
        pooling = tf.nn.max_pool3d(d1, strides, strides, "SAME")
    with tf.Session(config=config) as sess:
        t22 = time.time()
        expected = sess.run(pooling)
        t11 = time.time()
        for i in range(0, num_trials):
            sess.run(pooling)
        t22 = time.time()
        td = abs(t22 - t11) / max(num_trials, 1)
        print("time dense gpu: ", td)
    tf.reset_default_graph()

    print("time ratio: ", ts / td)

    [bp_ind, sv3_bp_val,
     bp_sh] = sp.createRandomSparseTensor(1, [len(sv3.out_values)], 1, 9)
    d3_ = sp.sparse1d_to_dense(sv3.out_indices, sv3_bp_val, sv3.out_shape,
                               sv3.out_block_channel_mapping[-1])
    out_backprop_val = constant_op.constant(d3_)

    t_bp1 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = gen_nn_ops._max_pool3d_grad(d1, expected, out_backprop_val,
                                              strides, strides, "SAME")
            #fbp = nn_ops.conv3d_backprop_filter_v2(d1, filter_in_sizes,  out_backprop_val, strides, padding)
        res_bp1 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp1 = t_bp1 + t2 - t1
    t_bp1 = t_bp1 / float(num_trials)
    print("time bp1: ", t_bp1)

    t_bp3 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = sc_module.direct_sparse_max_pooling_kd_backprop(
                pd.out_indices, pd.out_values, pd.out_shape,
                pd.out_block_channel_mapping, sv3.out_indices, sv3.out_values,
                sv3.out_shape, sv3.out_block_channel_mapping, sv3_bp_val,
                strides, dim)
        res_bp3 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp3 = t_bp3 + t2 - t1
    t_bp3 = t_bp3 / float(num_trials)
    print("time bp3: ", t_bp3)
    print("bp ratio: ", t_bp1 / t_bp3)
    return 0
    bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3, pd.out_shape,
                                  pd.out_block_channel_mapping[-1])
    #print("dense bp ", res_bp1)
    #print("sparse bp: ", bp_sig)

    tsu = 0
    with tf.device("/gpu:0"):
        unpooling = sc_module.direct_sparse_unpooling_kd(
            sv3.out_indices, sv3.out_values, sv3.out_shape,
            sv3.out_block_channel_mapping, pd.out_indices, pd.out_shape,
            pd.out_block_channel_mapping, strides, dim)
    with tf.Session(config=config) as sess:
        t6 = time.time()
        sv4 = sess.run(unpooling)
        t5 = time.time()
        for i in range(0, num_trials):
            sess.run(unpooling)
        t6 = time.time()
        tsu = abs(t6 - t5) / max(num_trials, 1)
        print("time sparse unpooling: ", tsu)
    tf.reset_default_graph()
    '''print("sparse bp", bp_sig)
  print("sv3 obcm", sv3.out_block_channel_mapping)
  print("len", len(sv3.out_indices))
  print("pd obcm", pd.out_block_channel_mapping)
  print("len", len(pd.out_indices))
  '''
    t_bp4 = 0
    with tf.Session(config=config) as sess:
        with tf.device("/gpu:0"):
            fbp = sc_module.direct_sparse_unpooling_kd_backprop(
                sv3.out_indices, sv3.out_values, sv3.out_shape,
                sv3.out_block_channel_mapping, pd.out_indices, pd.out_values,
                pd.out_shape, pd.out_block_channel_mapping, res_bp3, strides,
                dim)
        res_bp4 = sess.run(fbp)
        for i in range(num_trials):
            t1 = time.time()
            sess.run(fbp)
            t2 = time.time()
            t_bp4 = t_bp4 + t2 - t1
    t_bp4 = t_bp4 / float(num_trials)
    print("time bp3: ", t_bp4)

    bp_unpool = sp.sparse1d_to_dense(sv3.out_indices, res_bp4, sv3.out_shape,
                                     sv3.out_block_channel_mapping[-1])
    #print("bp unpool", bp_unpool)

    value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values,
                                  sv3.out_shape,
                                  sv3.out_block_channel_mapping[-1])
    #print("result sparse ", value3)
    has_error = False
    approx_cmp = expected.flatten()
    approx = value3.flatten()
    non_zero_count = 0
    for i in range(len(approx_cmp)):
        #if approx[i] == 0:
        #approx_cmp[i] = 0
        #else:
        non_zero_count = non_zero_count + 1
    print("entry count: ", non_zero_count)
    error_cnt = 0
    first_error = 0
    correct_cnt = 0
    for i in range(len(approx_cmp)):
        if abs(approx_cmp[i] - approx[i]) > 1e-3:
            #print("error: %d != %d " % (approx_cmp[i], approx[i]))
            #print("at id ", i)
            if has_error == False:
                first_error = i
            has_error = True
            error_cnt = error_cnt + 1
        elif approx[i] != 0:
            correct_cnt = correct_cnt + 1

    bp_sig_flat = bp_sig.flatten()
    res_bp2_flat = res_bp1.flatten()
    bp_i_error_cnt = 0
    bp_i_correct_cnt = 0
    for i in range(len(approx_cmp)):
        if approx[i] != 0:
            if bp_sig_flat[i] == res_bp2_flat[i]:
                bp_i_correct_cnt = bp_i_correct_cnt + 1
            else:
                bp_i_error_cnt = bp_i_error_cnt + 1

    p_flat = pd.out_values.flatten()
    up_flat = sv4.flatten()
    up_i_error_cnt = 0
    up_i_correct_cnt = 0
    for i in range(len(p_flat)):
        if p_flat[i] <= up_flat[i]:
            up_i_correct_cnt = up_i_correct_cnt + 1
        else:
            up_i_error_cnt = up_i_error_cnt + 1
    if dim == 5:
        up_bp_cor = 0
        up_bp_err = 0
        for batch in range(0, tensor_in_sizes[0]):
            for channel in range(0, tensor_in_sizes[4]):
                for x in range(
                        0,
                        int(ceil(
                            float(tensor_in_sizes[1]) / float(strides[1])))):
                    for y in range(
                            0,
                            int(
                                ceil(
                                    float(tensor_in_sizes[2]) /
                                    float(strides[2])))):
                        for z in range(
                                0,
                                int(
                                    ceil(
                                        float(tensor_in_sizes[3]) /
                                        float(strides[3])))):
                            id_in = (batch, x, y, z, channel)
                            inval = value3.item(id_in)
                            max_out_val = -100000000000
                            for dx in range(0, strides[1]):
                                xout = x * strides[1] + dx
                                if xout >= d1.shape[1]:
                                    continue
                                for dy in range(0, strides[2]):
                                    yout = y * strides[2] + dy
                                    if yout >= d1.shape[2]:
                                        continue
                                    for dz in range(0, strides[3]):
                                        zout = z * strides[3] + dz
                                        if zout >= d1.shape[3]:
                                            continue
                                        id_out = (batch, xout, yout, zout,
                                                  channel)
                                        out_val = d1.item(id_out)
                                        max_out_val = max(max_out_val, out_val)
                            if max_out_val == -100000000000 or max_out_val == inval:
                                up_bp_cor = up_bp_cor + 1
                            else:
                                up_bp_err = up_bp_err + 1

        print("total number of pooling corrects: ", up_bp_cor)
        print("total number of pooling errors: ", up_bp_err)

    if dim == 5:
        up_bp_cor = 0
        up_bp_err = 0
        tmp = np.copy(bp_unpool)
        for batch in range(0, tensor_in_sizes[0]):
            for channel in range(0, tensor_in_sizes[4]):
                for x in range(
                        0,
                        int(ceil(
                            float(tensor_in_sizes[1]) / float(strides[1])))):
                    for y in range(
                            0,
                            int(
                                ceil(
                                    float(tensor_in_sizes[2]) /
                                    float(strides[2])))):
                        for z in range(
                                0,
                                int(
                                    ceil(
                                        float(tensor_in_sizes[3]) /
                                        float(strides[3])))):
                            id_in = (batch, x, y, z, channel)
                            inval = bp_unpool.item(id_in)
                            sum_out_val = 0
                            for dx in range(0, strides[1]):
                                xout = x * strides[1] + dx
                                if xout >= bp_sig.shape[1]:
                                    continue
                                for dy in range(0, strides[2]):
                                    yout = y * strides[2] + dy
                                    if yout >= bp_sig.shape[2]:
                                        continue
                                    for dz in range(0, strides[3]):
                                        zout = z * strides[3] + dz
                                        if zout >= bp_sig.shape[3]:
                                            continue
                                        id_out = (batch, xout, yout, zout,
                                                  channel)
                                        out_val = bp_sig.item(id_out)
                                        sum_out_val = sum_out_val + out_val
                            if sum_out_val == inval:
                                up_bp_cor = up_bp_cor + 1
                            else:
                                up_bp_err = up_bp_err + 1
                            tmp[id_in] = sum_out_val
        #print("pbup: ", bp_unpool)
        #print("epbup: ", tmp)
        print("total number of unpooling bp corrects: ", up_bp_cor)
        print("total number of unpooling bp errors: ", up_bp_err)

    print("total number of non-zero corrects: ", correct_cnt)
    print("total number of bpi corrects: ", bp_i_correct_cnt)
    print("total number of unpooling corrects: ", up_i_correct_cnt)
    if has_error:
        print("total number of errors: ", error_cnt)
        print("first error: ", first_error)
    if bp_i_error_cnt > 0:
        print("total number of  bpi errors: ", bp_i_error_cnt)
    if up_i_error_cnt > 0:
        print("total number of  up errors: ", up_i_error_cnt)
        return 1
    print("OK")
    return 0