def eval_runtime(design): with gzip.open( "../../../../benchmarks/ispd2005/density/%s_density.pklz" % (design), "rb") as f: node_size_x, node_size_y, bin_center_x, bin_center_y, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_terminals, num_filler_nodes = pickle.load( f) pos_var = Variable(torch.empty(len(node_size_x) * 2, dtype=torch.float64).uniform_(xl, xh), requires_grad=True).cuda() custom_cuda_by_node = density_overflow.DensityOverflow( torch.from_numpy(node_size_x).cuda(), torch.from_numpy(node_size_y).cuda(), torch.from_numpy(bin_center_x).cuda(), torch.from_numpy(bin_center_y).cuda(), target_density=target_density, xl=xl, yl=yl, xh=xh, yh=yh, bin_size_x=bin_size_x, bin_size_y=bin_size_y, num_movable_nodes=num_movable_nodes, num_terminals=num_terminals, num_filler_nodes=num_filler_nodes, algorithm='by-node') custom_cuda_thread_map = density_overflow.DensityOverflow( torch.from_numpy(node_size_x).cuda(), torch.from_numpy(node_size_y).cuda(), torch.from_numpy(bin_center_x).cuda(), torch.from_numpy(bin_center_y).cuda(), target_density=target_density, xl=xl, yl=yl, xh=xh, yh=yh, bin_size_x=bin_size_x, bin_size_y=bin_size_y, num_movable_nodes=num_movable_nodes, num_terminals=num_terminals, num_filler_nodes=num_filler_nodes, algorithm='threadmap') torch.cuda.synchronize() iters = 10 tt = time.time() for i in range(iters): result = custom_cuda_by_node.forward(pos_var) torch.cuda.synchronize() print("custom_cuda_by_node takes %.3f ms" % ((time.time() - tt) / iters * 1000)) tt = time.time() for i in range(iters): result = custom_cuda_thread_map.forward(pos_var) torch.cuda.synchronize() print("custom_cuda_thread_map takes %.3f ms" % ((time.time() - tt) / iters * 1000))
def build_density_overflow(self, params, placedb, data_collections, device): """ @brief compute density overflow @param params parameters @param placedb placement database @param data_collections a collection of all data and variables required for constructing the ops @param device cpu or cuda """ return density_overflow.DensityOverflow( data_collections.node_size_x, data_collections.node_size_x, data_collections.bin_center_x, data_collections.bin_center_y, target_density=params.target_density, xl=placedb.xl, yl=placedb.yl, xh=placedb.xh, yh=placedb.yh, bin_size_x=placedb.bin_size_x, bin_size_y=placedb.bin_size_y, num_movable_nodes=placedb.num_movable_nodes, num_terminals=placedb.num_terminals, num_filler_nodes=0, algorithm='by-node', num_threads=params.num_threads)
def eval_runtime(design): with gzip.open(design, "rb") as f: node_size_x, node_size_y, bin_center_x, bin_center_y, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_terminals, num_filler_nodes = pickle.load( f) pos_var = Variable(torch.empty(len(node_size_x) * 2, dtype=torch.float64).uniform_(xl, xh), requires_grad=True).cuda() custom_cuda = density_overflow.DensityOverflow( torch.from_numpy(node_size_x).cuda(), torch.from_numpy(node_size_y).cuda(), torch.from_numpy(bin_center_x).cuda(), torch.from_numpy(bin_center_y).cuda(), target_density=target_density, xl=xl, yl=yl, xh=xh, yh=yh, bin_size_x=bin_size_x, bin_size_y=bin_size_y, num_movable_nodes=num_movable_nodes, num_terminals=num_terminals, num_filler_nodes=num_filler_nodes, ) torch.cuda.synchronize() iters = 10 tt = time.time() for i in range(iters): result = custom_cuda.forward(pos_var) torch.cuda.synchronize() print("custom_cuda takes %.3f ms" % ((time.time() - tt) / iters * 1000))
def test_densityOverflowRandom(self): dtype = np.float32 xx = np.array([1.0, 2.0]).astype(dtype) yy = np.array([3.0, 1.5]).astype(dtype) node_size_x = np.array([0.5, 1.0]).astype(dtype) node_size_y = np.array([1.0, 1.0]).astype(dtype) #xx = np.array([2.0]).astype(dtype) #yy = np.array([1.5]).astype(dtype) #node_size_x = np.array([1.0]).astype(dtype) #node_size_y = np.array([1.0]).astype(dtype) num_nodes = len(xx) scale_factor = 1.0 xl = 1.0 yl = 1.0 xh = 5.0 yh = 5.0 bin_size_x = 2.0 bin_size_y = 2.0 target_density = 0.1 num_bins_x = int(np.ceil((xh - xl) / bin_size_x)) num_bins_y = int(np.ceil((yh - yl) / bin_size_y)) num_movable_nodes = len(xx) num_terminals = 0 num_filler_nodes = 0 bin_center_x = np.zeros(num_bins_x, dtype=dtype) for id_x in range(num_bins_x): bin_center_x[id_x] = (bin_xl(id_x, xl, bin_size_x) + bin_xh(id_x, xl, xh, bin_size_x)) / 2 bin_center_y = np.zeros(num_bins_y, dtype=dtype) for id_y in range(num_bins_y): bin_center_y[id_y] = (bin_yl(id_y, yl, bin_size_y) + bin_yh(id_y, yl, yh, bin_size_y)) / 2 # test cpu custom = density_overflow.DensityOverflow( torch.from_numpy(node_size_x), torch.from_numpy(node_size_y), torch.from_numpy(bin_center_x), torch.from_numpy(bin_center_y), target_density=target_density, xl=xl, yl=yl, xh=xh, yh=yh, bin_size_x=bin_size_x, bin_size_y=bin_size_y, num_movable_nodes=num_movable_nodes, num_terminals=num_terminals, num_filler_nodes=num_filler_nodes) pos = Variable(torch.from_numpy(np.concatenate([xx, yy]))) result, max_density = custom.forward(pos) print("custom_result = ", result) print("custom_max_density = ", max_density) # test cuda if torch.cuda.device_count(): custom_cuda = density_overflow.DensityOverflow( torch.from_numpy(node_size_x).cuda(), torch.from_numpy(node_size_y).cuda(), torch.from_numpy(bin_center_x).cuda(), torch.from_numpy(bin_center_y).cuda(), target_density=target_density, xl=xl, yl=yl, xh=xh, yh=yh, bin_size_x=bin_size_x, bin_size_y=bin_size_y, num_movable_nodes=num_movable_nodes, num_terminals=num_terminals, num_filler_nodes=num_filler_nodes, algorithm='by-node') pos = Variable(torch.from_numpy(np.concatenate([xx, yy]))).cuda() result_cuda, max_density_cuda = custom_cuda.forward(pos) print("by-node custom_result = ", result_cuda.data.cpu()) print("by-node custom_max_density_cuda = ", max_density_cuda.data.cpu()) np.testing.assert_allclose(result, result_cuda.data.cpu()) np.testing.assert_allclose(max_density, max_density_cuda.data.cpu()) # test cuda if torch.cuda.device_count(): custom_cuda = density_overflow.DensityOverflow( torch.from_numpy(node_size_x).cuda(), torch.from_numpy(node_size_y).cuda(), torch.from_numpy(bin_center_x).cuda(), torch.from_numpy(bin_center_y).cuda(), target_density=target_density, xl=xl, yl=yl, xh=xh, yh=yh, bin_size_x=bin_size_x, bin_size_y=bin_size_y, num_movable_nodes=num_movable_nodes, num_terminals=num_terminals, num_filler_nodes=num_filler_nodes, algorithm='threadmap') pos = Variable(torch.from_numpy(np.concatenate([xx, yy]))).cuda() result_cuda, max_density_cuda = custom_cuda.forward(pos) print("threadmap custom_result = ", result_cuda.data.cpu()) print("threadmap custom_max_density_cuda = ", max_density_cuda.data.cpu()) np.testing.assert_allclose(result, result_cuda.data.cpu()) np.testing.assert_allclose(max_density, max_density_cuda.data.cpu())