def build_logsumexp_wl(self, params, placedb, data_collections, pin_pos_op): """ @brief build the op to compute log-sum-exp wirelength @param params parameters @param placedb placement database @param data_collections a collection of data and variables required for constructing ops @param pin_pos_op the op to compute pin locations according to cell locations """ gamma = 10 * self.base_gamma(params, placedb) print("[I] gamma = %g" % (gamma)) wirelength_for_pin_op = logsumexp_wirelength.LogSumExpWirelength( flat_netpin=data_collections.flat_net2pin_map, netpin_start=data_collections.flat_net2pin_start_map, pin2net_map=data_collections.pin2net_map, net_mask=data_collections.net_mask_ignore_large_degrees, gamma=torch.tensor(gamma, dtype=data_collections.pos[0].dtype, device=data_collections.pos[0].device), algorithm='atomic') # wirelength for position def build_wirelength_op(pos): pin_pos = pin_pos_op(pos) return wirelength_for_pin_op(pin_pos) # update gamma base_gamma = self.base_gamma(params, placedb) def build_update_gamma_op(iteration, overflow): self.update_gamma(iteration, overflow, base_gamma) #print("[I] update gamma to %g" % (wirelength_for_pin_op.gamma.data)) return build_wirelength_op, build_update_gamma_op
def build_logsumexp_wl(self, params, placedb, data_collections, pin_pos_op): """ @brief build the op to compute log-sum-exp wirelength @param params parameters @param placedb placement database @param data_collections a collection of data and variables required for constructing ops @param pin_pos_op the op to compute pin locations according to cell locations """ wirelength_for_pin_op = logsumexp_wirelength.LogSumExpWirelength( flat_netpin=data_collections.flat_net2pin_map, netpin_start=data_collections.flat_net2pin_start_map, pin2net_map=data_collections.pin2net_map, net_weights=data_collections.net_weights, net_mask=data_collections.net_mask_ignore_large_degrees, pin_mask=data_collections.pin_mask_ignore_fixed_macros, gamma=self.gamma, algorithm='merged', num_threads=params.num_threads) # wirelength for position def build_wirelength_op(pos): return wirelength_for_pin_op(pin_pos_op(pos)) # update gamma base_gamma = self.base_gamma(params, placedb) def build_update_gamma_op(iteration, overflow): self.update_gamma(iteration, overflow, base_gamma) #logging.debug("update gamma to %g" % (wirelength_for_pin_op.gamma.data)) return build_wirelength_op, build_update_gamma_op
def test_logsumexp_wirelength_random(self): pin_pos = np.array( [[0.0, 0.0], [1.0, 2.0], [1.5, 0.2], [0.5, 3.1], [0.6, 1.1]], dtype=np.float32) * 10 net2pin_map = np.array([np.array([0, 4]), np.array([1, 2, 3])]) pin2net_map = np.zeros(len(pin_pos), dtype=np.int32) for net_id, pins in enumerate(net2pin_map): for pin in pins: pin2net_map[pin] = net_id pin_x = pin_pos[:, 0] pin_y = pin_pos[:, 1] gamma = torch.tensor(0.5, dtype=torch.float32) ignore_net_degree = 4 # net mask net_mask = np.ones(len(net2pin_map), dtype=np.uint8) for i in range(len(net2pin_map)): if len(net2pin_map[i]) >= ignore_net_degree: net_mask[i] = 0 # construct flat_net2pin_map and flat_net2pin_start_map # flat netpin map, length of #pins flat_net2pin_map = np.zeros(len(pin_pos), dtype=np.int32) # starting index in netpin map for each net, length of #nets+1, the last entry is #pins flat_net2pin_start_map = np.zeros(len(net2pin_map) + 1, dtype=np.int32) count = 0 for i in range(len(net2pin_map)): flat_net2pin_map[count:count + len(net2pin_map[i])] = net2pin_map[i] flat_net2pin_start_map[i] = count count += len(net2pin_map[i]) flat_net2pin_start_map[len(net2pin_map)] = len(pin_pos) print("flat_net2pin_map = ", flat_net2pin_map) print("flat_net2pin_start_map = ", flat_net2pin_start_map) print(np.transpose(pin_pos)) pin_pos_var = Variable(torch.from_numpy(np.transpose(pin_pos)).reshape( [-1]), requires_grad=True) #pin_pos_var = torch.nn.Parameter(torch.from_numpy(np.transpose(pin_pos)).reshape([-1])) print(pin_pos_var) golden = build_wirelength(pin_pos_var[:pin_pos_var.numel() // 2], pin_pos_var[pin_pos_var.numel() // 2:], pin2net_map, net2pin_map, gamma, ignore_net_degree) print("golden_value = ", golden.data) golden.backward() golden_grad = pin_pos_var.grad.clone() print("golden_grad = ", golden_grad.data) # test cpu # clone is very important, because the custom op cannot deep copy the data pin_pos_var.grad.zero_() custom = logsumexp_wirelength.LogSumExpWirelength( torch.from_numpy(flat_net2pin_map), torch.from_numpy(flat_net2pin_start_map), torch.from_numpy(pin2net_map), torch.from_numpy(net_mask), torch.tensor(gamma), algorithm='sparse') result = custom.forward(pin_pos_var) print("custom = ", result) result.backward() grad = pin_pos_var.grad.clone() print("custom_grad = ", grad) np.testing.assert_allclose(result.data.numpy(), golden.data.detach().numpy()) np.testing.assert_allclose(grad.data.numpy(), golden_grad.data.numpy()) # test gpu if torch.cuda.device_count(): pin_pos_var.grad.zero_() custom_cuda = logsumexp_wirelength.LogSumExpWirelength( Variable(torch.from_numpy(flat_net2pin_map)).cuda(), Variable(torch.from_numpy(flat_net2pin_start_map)).cuda(), torch.from_numpy(pin2net_map).cuda(), torch.from_numpy(net_mask).cuda(), torch.tensor(gamma).cuda(), algorithm='sparse') result_cuda = custom_cuda.forward(pin_pos_var.cuda()) print("custom_cuda_result = ", result_cuda.data.cpu()) result_cuda.backward() grad_cuda = pin_pos_var.grad.clone() print("custom_grad_cuda = ", grad_cuda.data.cpu()) np.testing.assert_allclose(result_cuda.data.cpu().numpy(), golden.data.detach().numpy()) np.testing.assert_allclose(grad_cuda.data.cpu().numpy(), grad.data.numpy(), rtol=1e-7, atol=1e-12) # test gpu atomic if torch.cuda.device_count(): pin_pos_var.grad.zero_() custom_cuda = logsumexp_wirelength.LogSumExpWirelength( Variable(torch.from_numpy(flat_net2pin_map)).cuda(), Variable(torch.from_numpy(flat_net2pin_start_map)).cuda(), torch.from_numpy(pin2net_map).cuda(), torch.from_numpy(net_mask).cuda(), torch.tensor(gamma).cuda(), algorithm='atomic') result_cuda = custom_cuda.forward(pin_pos_var.cuda()) print("custom_cuda_result atomic = ", result_cuda.data.cpu()) result_cuda.backward() grad_cuda = pin_pos_var.grad.clone() print("custom_grad_cuda atomic = ", grad_cuda.data.cpu()) np.testing.assert_allclose(result_cuda.data.cpu().numpy(), golden.data.detach().numpy()) np.testing.assert_allclose(grad_cuda.data.cpu().numpy(), grad.data.numpy(), rtol=1e-7, atol=1e-15)