def verifyValues(filter_in_sizes, rho_filter=1, dim=5, scale_val=0.1, bias_val=0.1, num_trials=3): out_channel_count = filter_in_sizes[-1] if isinstance(bias_val, collections.Iterable): bias = np.array(bias_val, dtype=np.float32) else: bias = np.array([bias_val] * out_channel_count, dtype=np.float32) scale = np.array(scale_val, dtype=np.float32) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.device("/gpu:0"): convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t2sh) with tf.Session(config=config) as sess: pf = sess.run(convf) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): creg = sc_module.direct_sparse_channelwise_biased_l2_regularization( pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, scale, bias, dim) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(creg) t5 = time.time() for i in range(0, num_trials): sess.run(creg) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() time.sleep(1) reg_loss = 0 for out_channel in range(out_channel_count): reg_loss += np.sum( np.power(d2[:, :, :, :, out_channel] + bias[out_channel], 2)) * scale / 2. print(sv3, reg_loss) if abs(sv3 - reg_loss) > 0.001: print("error") return 1 #bp_sfg = sp.sparse1d_to_dense(pf.out_indices, res_bp3.filter_grads, pf.out_shape, pf.out_channel_mapping[-1]) return 0
def verifyValues(tensor_in_sizes, rho_data = 0.1, dim = 5, num_trials = 3): [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 #reorder data and generate block index lookup table with tf.device("/gpu:0"): con1 = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) con2 = sc_module.direct_sparse_data_conversion(t2ind, t2val, t2sh) with tf.Session(config=config) as sess: pd1 = sess.run(con1) pd2 = sess.run(con2) tf.reset_default_graph() values1 = pd1.out_values ts = 0 with tf.device("/gpu:0"): concat = sc_module.direct_sparse_concat( pd1.out_indices, pd1.out_values, pd1.out_shape, pd1.out_block_channel_mapping, pd2.out_indices, pd2.out_values, pd2.out_shape, pd2.out_block_channel_mapping); with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(concat) t5 = time.time() for i in range(0, num_trials): sess.run(concat) t6 = time.time() ts = abs(t6 - t5) / max(num_trials,1) print("time sparse concat: ", ts) tf.reset_default_graph() print("pd1", pd1) print("") print("pd2", pd2) print("") print("sv3", sv3) return 0
def verifyValues(tensor_in_sizes, filter_in_sizes, stride, rho_data=0.1, rho_filter=1, padding='SAME', dim=5, max_density=0.1, num_trials=3, filter_type="K-RELU", test_type=""): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] out_sizes = np.copy(tensor_in_sizes) out_sizes[-1] = filter_in_sizes[-1] out_entry_count = np.prod(out_sizes) * max_density bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32) no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes, -3, 3) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) print("filter shape", filter_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t1sh) with tf.Session(config=config) as sess: pd = sess.run(convd) pf = sess.run(convf) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): approx_scskconv = sc_module.direct_sparse_conv_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, bias, strides, padding, out_entry_count, dim, max_density, filter_type) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(approx_scskconv) t5 = time.time() for i in range(0, num_trials): sess.run(approx_scskconv) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() time.sleep(1) td = 0 with tf.device("/gpu:0"): conv = nn_ops.conv3d(d1, d2, strides, padding) with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(conv) t11 = time.time() for i in range(0, num_trials): sess.run(conv) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() print("time ratio: ", ts / td) return [bp_ind, sv3_bp_val, bp_sh] = sp.createRandomSparseTensor(1, [len(sv3.out_values)], 1, 9) d3_ = sp.sparse1d_to_dense(sv3.out_indices, sv3_bp_val, sv3.out_shape, sv3.out_block_channel_mapping[-1]) out_backprop_val = constant_op.constant(d3_) t_bp1 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = nn_ops.conv3d_backprop_filter_v2(d1, filter_in_sizes, out_backprop_val, strides, padding) res_bp1 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp1 = t_bp1 + t2 - t1 t_bp1 = t_bp1 / float(num_trials) print("time bp1: ", t_bp1) t_bp2 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = nn_ops.conv3d_backprop_input_v2(tensor_in_sizes, d2, out_backprop_val, strides, padding) res_bp2 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp2 = t_bp2 + t2 - t1 t_bp2 = t_bp2 / float(num_trials) print("time bp2: ", t_bp2) t_bp3 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = sc_module.direct_sparse_conv_kd_backprop( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping, sv3_bp_val, strides, padding, dim, max_density) res_bp3 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp3 = t_bp3 + t2 - t1 t_bp3 = t_bp3 / float(num_trials) print("time bp3: ", t_bp3) print("sparse ratio: ", t_bp3 / (t_bp2 + t_bp1)) bp_sfg = sp.sparse1d_to_dense(pf.out_indices, res_bp3.filter_grads, pf.out_shape, pf.out_channel_mapping[-1]) bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3.input_grads, pd.out_shape, pd.out_block_channel_mapping[-1]) value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping[-1]) print("expected", expected) print("sv3", value3) print("out densities", sv3.out_channel_densities) has_error = False approx_cmp = expected.flatten() approx = value3.flatten() non_zero_count = 0 for i in range(len(approx_cmp)): non_zero_count = non_zero_count + 1 print("entry count: ", non_zero_count) error_cnt = 0 first_error = 0 correct_cnt = 0 for i in range(len(approx_cmp)): if approx_cmp[i] > 0 and abs(approx_cmp[i] - approx[i]) > 1e-3: if has_error == False: first_error = i has_error = True error_cnt = error_cnt + 1 elif approx[i] != 0: correct_cnt = correct_cnt + 1 bp_sig_flat = bp_sig.flatten() res_bp2_flat = res_bp2.flatten() bp_i_error_cnt = 0 bp_i_correct_cnt = 0 for i in range(len(bp_sig_flat)): if bp_sig_flat[i] != 0: if bp_sig_flat[i] == res_bp2_flat[i]: bp_i_correct_cnt = bp_i_correct_cnt + 1 else: bp_i_error_cnt = bp_i_error_cnt + 1 filter_flat = d2.flatten() bp_sfg_flat = bp_sfg.flatten() res_bp1_flat = res_bp1.flatten() bp_f_error_cnt = 0 bp_f_correct_cnt = 0 for i in range(len(filter_flat)): if filter_flat[i] != 0: if bp_sfg_flat[i] == res_bp1_flat[i]: bp_f_correct_cnt = bp_f_correct_cnt + 1 else: bp_f_error_cnt = bp_f_error_cnt + 1 print("total number of non-zero corrects: ", correct_cnt) print("sparse input size: ", len(t1ind)) print("total number of bpi corrects: ", bp_i_correct_cnt) print("sparse filter size: ", len(t2ind)) print("total number of bpf corrects: ", bp_f_correct_cnt) if has_error: print("total number of errors: ", error_cnt) print("first error: ", first_error) return 1 if bp_i_error_cnt > 0: print("total number of bpi errors: ", bp_i_error_cnt) if bp_f_error_cnt > 0: print("total number of bpf errors: ", bp_f_error_cnt) print("OK") return 0
def verifyValues(tensor_in_sizes, filter_in_sizes, stride, rho_data=0.1, rho_filter=1, padding='SAME', dim=5, max_density=0.1, num_trials=3, filter_type='K-RELU', test_type='', dense=True): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] out_sizes = np.copy(tensor_in_sizes) out_sizes[-1] = filter_in_sizes[-1] out_entry_count = np.prod(out_sizes) * max_density bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32) no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes, -3, 3) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) print("filter shape", filter_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t1sh) with tf.Session(config=config) as sess: pd = sess.run(convd) pf = sess.run(convf) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): approx_scskconv = sc_module.direct_sparse_conv_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, bias, strides, padding, out_entry_count, dim, max_density, filter_type) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(approx_scskconv) t5 = time.time() for i in range(0, num_trials): sess.run(approx_scskconv) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() time.sleep(1) if dense: td = 0 with tf.device("/gpu:0"): conv = nn_ops.conv3d(d1, d2, strides, padding) with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(conv) t11 = time.time() for i in range(0, num_trials): sess.run(conv) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() print("time ratio: ", ts / td) return [expected, sv3, ts, td]
def verifyValues(tensor_in_sizes, rho_data=0.1, dim=5, num_trials=3, test_type=""): [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) #print("ind in: \n", t1ind) #print("input: \n", d1) # Initializes the input tensor with array containing incrementing # numbers from 1. print("input shape", tensor_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 #reorder data and generate block index lookup table td = 0 with tf.device("/gpu:0"): dts = sc_module.direct_dense_to_sparse(d1, tensor_in_sizes, dim) with tf.Session(config=config) as sess: t22 = time.time() pd = sess.run(dts) t11 = time.time() for i in range(0, num_trials): sess.run(dts) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense to sparse gpu: ", td) tf.reset_default_graph() expected = d1 td = 0 with tf.device("/gpu:0"): s2d = sc_module.direct_sparse_to_dense(pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping) with tf.Session(config=config) as sess: t22 = time.time() sv3 = sess.run(s2d) t11 = time.time() for i in range(0, num_trials): sess.run(s2d) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time sparse to dense gpu: ", td) tf.reset_default_graph() [bp_ind, sv3_bp_val, bp_sh] = sp.createRandomSparseTensor(1, tensor_in_sizes, 1, 9) d3_ = sp.sparse1d_to_dense(pd.out_indices, sv3_bp_val, pd.out_shape, pd.out_block_channel_mapping[-1]) out_backprop_val = constant_op.constant(d3_) t_bp3 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = sc_module.direct_sparse_to_dense_backprop( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, sv3, out_backprop_val) res_bp3 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp3 = t_bp3 + t2 - t1 t_bp3 = t_bp3 / float(num_trials) print("time bp sparse to dense: ", t_bp3) t_bp4 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = sc_module.direct_dense_to_sparse_backprop( sv3, pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, res_bp3) res_bp4 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp4 = t_bp3 + t2 - t1 t_bp4 = t_bp4 / float(num_trials) print("time bp dense to sparse: ", t_bp4) bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3, pd.out_shape, pd.out_block_channel_mapping[-1]) #print("dense bp ", res_bp1) #print("sparse bp: ", bp_sig) has_error = False approx_cmp = expected.flatten() approx = sv3.flatten() non_zero_count = 0 for i in range(len(approx_cmp)): non_zero_count = non_zero_count + 1 print("entry count: ", non_zero_count) error_cnt = 0 first_error = 0 correct_cnt = 0 for i in range(len(approx_cmp)): if abs(approx_cmp[i] - approx[i]) > 1e-3: if has_error == False: first_error = i has_error = True error_cnt = error_cnt + 1 elif approx[i] != 0: correct_cnt = correct_cnt + 1 ebp = d3_.flatten() #rbp = bp_sig.flatten() rbp = res_bp4.flatten() bperror_cnt = 0 bpcorrect_cnt = 0 for i in range(len(ebp)): if abs(ebp[i] - rbp[i]) > 1e-3: bperror_cnt = bperror_cnt + 1 elif rbp[i] != 0: bpcorrect_cnt = bpcorrect_cnt + 1 print("total number of non-zero corrects: ", correct_cnt) print("total number of backprop corrects: ", bpcorrect_cnt) if has_error: print("total number of errors: ", error_cnt) print("first error: ", first_error) return 1 if bperror_cnt > 0: print("total number of backprop errors: ", bperror_cnt) print("OK") return 0
def verifyValues(tensor_in_sizes, filter_in_sizes, stride, rho_data=0.1, rho_filter=1, padding='SAME', dim=5, max_density=0.1, num_trials=3, filter_type="K-RELU", test_type=""): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] bias = np.zeros([filter_in_sizes[-1]], dtype=np.float32) no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes, -3, 3) s1 = tf.SparseTensor(indices=t1ind, values=t1val, dense_shape=t1sh) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) [t2ind, t2val, t2sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes, -3, 3) s2 = tf.SparseTensor(indices=t2ind, values=t2val, dense_shape=t2sh) d2 = sp.sparse_to_dense(t2ind, t2val, t2sh) filter_in_sizes2 = filter_in_sizes[:] filter_in_sizes2[-2] = filter_in_sizes2[-1] [t3ind, t3val, t3sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes2, -3, 3) s3 = tf.SparseTensor(indices=t3ind, values=t3val, dense_shape=t3sh) d3 = sp.sparse_to_dense(t3ind, t3val, t3sh) [t4ind, t4val, t4sh] = sp.createRandomSparseTensor(rho_filter, filter_in_sizes2, -3, 3) s4 = tf.SparseTensor(indices=t4ind, values=t4val, dense_shape=t4sh) d4 = sp.sparse_to_dense(t4ind, t4val, t4sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) print("filter shape", filter_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) convf = sc_module.direct_sparse_filter_conversion( t2ind, t2val, t2sh, t1sh) convf2 = sc_module.direct_sparse_filter_conversion( t3ind, t3val, t3sh, t3sh) convf3 = sc_module.direct_sparse_filter_conversion( t4ind, t4val, t4sh, t4sh) with tf.Session(config=config) as sess: pd = sess.run(convd) pf = sess.run(convf) pf2 = sess.run(convf2) pf3 = sess.run(convf3) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): net = sc_module.direct_sparse_conv_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, pf.out_indices, pf.out_values, pf.out_shape, pf.out_channel_mapping, bias, strides, padding, dim, max_density, filter_type) net = sc_module.direct_sparse_conv_kd( net.out_indices, net.out_values, net.out_shape, net.out_block_channel_mapping, pf2.out_indices, pf2.out_values, pf2.out_shape, pf2.out_channel_mapping, bias, strides, padding, dim, max_density, filter_type) net = sc_module.direct_sparse_conv_kd( net.out_indices, net.out_values, net.out_shape, net.out_block_channel_mapping, pf3.out_indices, pf3.out_values, pf3.out_shape, pf3.out_channel_mapping, bias, strides, padding, dim, max_density, filter_type) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(net) t5 = time.time() for i in range(0, num_trials): sess.run(net) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() td = 0 with tf.device("/gpu:0"): net = nn_ops.conv3d(d1, d2, strides, padding) if filter_type == "K-RELU": net = nn_ops.relu(net) net = nn_ops.conv3d(net, d3, strides, padding) if filter_type == "K-RELU": net = nn_ops.relu(net) net = nn_ops.conv3d(net, d4, strides, padding) if filter_type == "K-RELU": net = nn_ops.relu(net) with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(net) t11 = time.time() for i in range(0, num_trials): sess.run(net) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping[-1]) #print("expected: ", expected) #print("sparse: ", value3, sv3) has_error = False approx_cmp = expected.flatten() approx = value3.flatten() non_zero_count = 0 for i in range(len(approx_cmp)): non_zero_count = non_zero_count + 1 print("entry count: ", non_zero_count) error_cnt = 0 first_error = 0 correct_cnt = 0 for i in range(len(approx_cmp)): if abs(approx_cmp[i] - approx[i]) > 1e-3: if has_error == False: first_error = i has_error = True error_cnt = error_cnt + 1 elif approx[i] != 0: correct_cnt = correct_cnt + 1 print("total number of non-zero corrects: ", correct_cnt) print("sparse input size: ", len(t1ind)) if has_error: print("total number of errors: ", error_cnt) print("first error: ", first_error) return 1 print("OK") return 0
def verifyValues(tensor_in_sizes, stride, rho_data=0.1, padding='SAME', dim=5, max_density=1, num_trials=3, test_type=""): if isinstance(stride, collections.Iterable): strides = [1] + list(stride) + [1] else: strides = [1, stride, stride, stride, 1] no_strides = [1, 1, 1, 1, 1] [t1ind, t1val, t1sh] = sp.createRandomSparseTensor(rho_data, tensor_in_sizes) d1 = sp.sparse_to_dense(t1ind, t1val, t1sh) print("strides: \n", strides) print("input shape", tensor_in_sizes) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 #reorder data and generate block index lookup table with tf.device("/gpu:0"): convd = sc_module.direct_sparse_data_conversion(t1ind, t1val, t1sh) with tf.Session(config=config) as sess: pd = sess.run(convd) tf.reset_default_graph() ts = 0 with tf.device("/gpu:0"): approx_scskconv = sc_module.direct_sparse_max_pooling_kd( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, strides, dim) with tf.Session(config=config) as sess: t6 = time.time() sv3 = sess.run(approx_scskconv) t5 = time.time() for i in range(0, num_trials): sess.run(approx_scskconv) t6 = time.time() ts = abs(t6 - t5) / max(num_trials, 1) print("time approx sparse: ", ts) tf.reset_default_graph() td = 0 with tf.device("/gpu:0"): pooling = tf.nn.max_pool3d(d1, strides, strides, "SAME") with tf.Session(config=config) as sess: t22 = time.time() expected = sess.run(pooling) t11 = time.time() for i in range(0, num_trials): sess.run(pooling) t22 = time.time() td = abs(t22 - t11) / max(num_trials, 1) print("time dense gpu: ", td) tf.reset_default_graph() print("time ratio: ", ts / td) [bp_ind, sv3_bp_val, bp_sh] = sp.createRandomSparseTensor(1, [len(sv3.out_values)], 1, 9) d3_ = sp.sparse1d_to_dense(sv3.out_indices, sv3_bp_val, sv3.out_shape, sv3.out_block_channel_mapping[-1]) out_backprop_val = constant_op.constant(d3_) t_bp1 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = gen_nn_ops._max_pool3d_grad(d1, expected, out_backprop_val, strides, strides, "SAME") #fbp = nn_ops.conv3d_backprop_filter_v2(d1, filter_in_sizes, out_backprop_val, strides, padding) res_bp1 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp1 = t_bp1 + t2 - t1 t_bp1 = t_bp1 / float(num_trials) print("time bp1: ", t_bp1) t_bp3 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = sc_module.direct_sparse_max_pooling_kd_backprop( pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping, sv3_bp_val, strides, dim) res_bp3 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp3 = t_bp3 + t2 - t1 t_bp3 = t_bp3 / float(num_trials) print("time bp3: ", t_bp3) print("bp ratio: ", t_bp1 / t_bp3) return 0 bp_sig = sp.sparse1d_to_dense(pd.out_indices, res_bp3, pd.out_shape, pd.out_block_channel_mapping[-1]) #print("dense bp ", res_bp1) #print("sparse bp: ", bp_sig) tsu = 0 with tf.device("/gpu:0"): unpooling = sc_module.direct_sparse_unpooling_kd( sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping, pd.out_indices, pd.out_shape, pd.out_block_channel_mapping, strides, dim) with tf.Session(config=config) as sess: t6 = time.time() sv4 = sess.run(unpooling) t5 = time.time() for i in range(0, num_trials): sess.run(unpooling) t6 = time.time() tsu = abs(t6 - t5) / max(num_trials, 1) print("time sparse unpooling: ", tsu) tf.reset_default_graph() '''print("sparse bp", bp_sig) print("sv3 obcm", sv3.out_block_channel_mapping) print("len", len(sv3.out_indices)) print("pd obcm", pd.out_block_channel_mapping) print("len", len(pd.out_indices)) ''' t_bp4 = 0 with tf.Session(config=config) as sess: with tf.device("/gpu:0"): fbp = sc_module.direct_sparse_unpooling_kd_backprop( sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping, pd.out_indices, pd.out_values, pd.out_shape, pd.out_block_channel_mapping, res_bp3, strides, dim) res_bp4 = sess.run(fbp) for i in range(num_trials): t1 = time.time() sess.run(fbp) t2 = time.time() t_bp4 = t_bp4 + t2 - t1 t_bp4 = t_bp4 / float(num_trials) print("time bp3: ", t_bp4) bp_unpool = sp.sparse1d_to_dense(sv3.out_indices, res_bp4, sv3.out_shape, sv3.out_block_channel_mapping[-1]) #print("bp unpool", bp_unpool) value3 = sp.sparse1d_to_dense(sv3.out_indices, sv3.out_values, sv3.out_shape, sv3.out_block_channel_mapping[-1]) #print("result sparse ", value3) has_error = False approx_cmp = expected.flatten() approx = value3.flatten() non_zero_count = 0 for i in range(len(approx_cmp)): #if approx[i] == 0: #approx_cmp[i] = 0 #else: non_zero_count = non_zero_count + 1 print("entry count: ", non_zero_count) error_cnt = 0 first_error = 0 correct_cnt = 0 for i in range(len(approx_cmp)): if abs(approx_cmp[i] - approx[i]) > 1e-3: #print("error: %d != %d " % (approx_cmp[i], approx[i])) #print("at id ", i) if has_error == False: first_error = i has_error = True error_cnt = error_cnt + 1 elif approx[i] != 0: correct_cnt = correct_cnt + 1 bp_sig_flat = bp_sig.flatten() res_bp2_flat = res_bp1.flatten() bp_i_error_cnt = 0 bp_i_correct_cnt = 0 for i in range(len(approx_cmp)): if approx[i] != 0: if bp_sig_flat[i] == res_bp2_flat[i]: bp_i_correct_cnt = bp_i_correct_cnt + 1 else: bp_i_error_cnt = bp_i_error_cnt + 1 p_flat = pd.out_values.flatten() up_flat = sv4.flatten() up_i_error_cnt = 0 up_i_correct_cnt = 0 for i in range(len(p_flat)): if p_flat[i] <= up_flat[i]: up_i_correct_cnt = up_i_correct_cnt + 1 else: up_i_error_cnt = up_i_error_cnt + 1 if dim == 5: up_bp_cor = 0 up_bp_err = 0 for batch in range(0, tensor_in_sizes[0]): for channel in range(0, tensor_in_sizes[4]): for x in range( 0, int(ceil( float(tensor_in_sizes[1]) / float(strides[1])))): for y in range( 0, int( ceil( float(tensor_in_sizes[2]) / float(strides[2])))): for z in range( 0, int( ceil( float(tensor_in_sizes[3]) / float(strides[3])))): id_in = (batch, x, y, z, channel) inval = value3.item(id_in) max_out_val = -100000000000 for dx in range(0, strides[1]): xout = x * strides[1] + dx if xout >= d1.shape[1]: continue for dy in range(0, strides[2]): yout = y * strides[2] + dy if yout >= d1.shape[2]: continue for dz in range(0, strides[3]): zout = z * strides[3] + dz if zout >= d1.shape[3]: continue id_out = (batch, xout, yout, zout, channel) out_val = d1.item(id_out) max_out_val = max(max_out_val, out_val) if max_out_val == -100000000000 or max_out_val == inval: up_bp_cor = up_bp_cor + 1 else: up_bp_err = up_bp_err + 1 print("total number of pooling corrects: ", up_bp_cor) print("total number of pooling errors: ", up_bp_err) if dim == 5: up_bp_cor = 0 up_bp_err = 0 tmp = np.copy(bp_unpool) for batch in range(0, tensor_in_sizes[0]): for channel in range(0, tensor_in_sizes[4]): for x in range( 0, int(ceil( float(tensor_in_sizes[1]) / float(strides[1])))): for y in range( 0, int( ceil( float(tensor_in_sizes[2]) / float(strides[2])))): for z in range( 0, int( ceil( float(tensor_in_sizes[3]) / float(strides[3])))): id_in = (batch, x, y, z, channel) inval = bp_unpool.item(id_in) sum_out_val = 0 for dx in range(0, strides[1]): xout = x * strides[1] + dx if xout >= bp_sig.shape[1]: continue for dy in range(0, strides[2]): yout = y * strides[2] + dy if yout >= bp_sig.shape[2]: continue for dz in range(0, strides[3]): zout = z * strides[3] + dz if zout >= bp_sig.shape[3]: continue id_out = (batch, xout, yout, zout, channel) out_val = bp_sig.item(id_out) sum_out_val = sum_out_val + out_val if sum_out_val == inval: up_bp_cor = up_bp_cor + 1 else: up_bp_err = up_bp_err + 1 tmp[id_in] = sum_out_val #print("pbup: ", bp_unpool) #print("epbup: ", tmp) print("total number of unpooling bp corrects: ", up_bp_cor) print("total number of unpooling bp errors: ", up_bp_err) print("total number of non-zero corrects: ", correct_cnt) print("total number of bpi corrects: ", bp_i_correct_cnt) print("total number of unpooling corrects: ", up_i_correct_cnt) if has_error: print("total number of errors: ", error_cnt) print("first error: ", first_error) if bp_i_error_cnt > 0: print("total number of bpi errors: ", bp_i_error_cnt) if up_i_error_cnt > 0: print("total number of up errors: ", up_i_error_cnt) return 1 print("OK") return 0