def forward( pos, node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, buf, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, # same dimensions as density map, with padding regions to be 1 num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, deterministic_flag, sorted_node_map, num_threads): if pos.is_cuda: output = electric_potential_cuda.density_map( pos.view(pos.numel()), node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, deterministic_flag, sorted_node_map) else: output = electric_potential_cpp.density_map( pos.view(pos.numel()), node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, buf, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, num_threads) density_map = output.view([num_bins_x, num_bins_y]) # set padding density if padding > 0: density_map.masked_fill_(padding_mask, target_density * bin_size_x * bin_size_y) return density_map
def forward( ctx, pos, node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, buf, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, # same dimensions as density map, with padding regions to be 1 num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, sorted_node_map, exact_expkM=None, # exp(-j*pi*k/M) exact_expkN=None, # exp(-j*pi*k/N) inv_wu2_plus_wv2=None, # 1.0/(wu^2 + wv^2) wu_by_wu2_plus_wv2_half=None, # wu/(wu^2 + wv^2)/2 wv_by_wu2_plus_wv2_half=None, # wv/(wu^2 + wv^2)/2 dct2=None, idct2=None, idct_idxst=None, idxst_idct=None, fast_mode=True, # fast mode will discard some computation num_threads=8 ): tt = time.time() if pos.is_cuda: output = electric_potential_cuda.density_map( pos.view(pos.numel()), node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, sorted_node_map ) else: output = electric_potential_cpp.density_map( pos.view(pos.numel()), node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, buf, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, num_threads ) # output consists of (density_cost, density_map, max_density) ctx.node_size_x_clamped = node_size_x_clamped ctx.node_size_y_clamped = node_size_y_clamped ctx.offset_x = offset_x ctx.offset_y = offset_y ctx.ratio = ratio ctx.bin_center_x = bin_center_x ctx.bin_center_y = bin_center_y ctx.target_density = target_density ctx.xl = xl ctx.yl = yl ctx.xh = xh ctx.yh = yh ctx.bin_size_x = bin_size_x ctx.bin_size_y = bin_size_y ctx.num_movable_nodes = num_movable_nodes ctx.num_filler_nodes = num_filler_nodes ctx.padding = padding ctx.num_bins_x = num_bins_x ctx.num_bins_y = num_bins_y ctx.num_movable_impacted_bins_x = num_movable_impacted_bins_x ctx.num_movable_impacted_bins_y = num_movable_impacted_bins_y ctx.num_filler_impacted_bins_x = num_filler_impacted_bins_x ctx.num_filler_impacted_bins_y = num_filler_impacted_bins_y ctx.pos = pos ctx.sorted_node_map = sorted_node_map ctx.num_threads = num_threads density_map = output.view([ctx.num_bins_x, ctx.num_bins_y]) #density_map = torch.ones([ctx.num_bins_x, ctx.num_bins_y], dtype=pos.dtype, device=pos.device) #ctx.field_map_x = torch.ones([ctx.num_bins_x, ctx.num_bins_y], dtype=pos.dtype, device=pos.device) #ctx.field_map_y = torch.ones([ctx.num_bins_x, ctx.num_bins_y], dtype=pos.dtype, device=pos.device) # return torch.zeros(1, dtype=pos.dtype, device=pos.device) # for DCT M = num_bins_x N = num_bins_y # wu and wv if inv_wu2_plus_wv2 is None: wu = torch.arange(M, dtype=density_map.dtype, device=density_map.device).mul(2 * np.pi / M).view([M, 1]) wv = torch.arange(N, dtype=density_map.dtype, device=density_map.device).mul(2 * np.pi / N).view([1, N]) wu2_plus_wv2 = wu.pow(2) + wv.pow(2) wu2_plus_wv2[0, 0] = 1.0 # avoid zero-division, it will be zeroed out inv_wu2_plus_wv2 = 1.0 / wu2_plus_wv2 inv_wu2_plus_wv2[0, 0] = 0.0 wu_by_wu2_plus_wv2_half = wu.mul(inv_wu2_plus_wv2).mul_(1./ 2) wv_by_wu2_plus_wv2_half = wv.mul(inv_wu2_plus_wv2).mul_(1./ 2) # compute auv density_map.mul_(1.0 / (ctx.bin_size_x * ctx.bin_size_y)) #auv = discrete_spectral_transform.dct2_2N(density_map, expk0=exact_expkM, expk1=exact_expkN) auv = dct2.forward(density_map) # compute field xi auv_by_wu2_plus_wv2_wu = auv.mul(wu_by_wu2_plus_wv2_half) auv_by_wu2_plus_wv2_wv = auv.mul(wv_by_wu2_plus_wv2_half) #ctx.field_map_x = discrete_spectral_transform.idsct2(auv_by_wu2_plus_wv2_wu, exact_expkM, exact_expkN).contiguous() ctx.field_map_x = idxst_idct.forward(auv_by_wu2_plus_wv2_wu) #ctx.field_map_y = discrete_spectral_transform.idcst2(auv_by_wu2_plus_wv2_wv, exact_expkM, exact_expkN).contiguous() ctx.field_map_y = idct_idxst.forward(auv_by_wu2_plus_wv2_wv) # energy = \sum q*phi # it takes around 80% of the computation time # so I will not always evaluate it if fast_mode: # dummy for invoking backward propagation energy = torch.zeros(1, dtype=pos.dtype, device=pos.device) else: # compute potential phi # auv / (wu**2 + wv**2) auv_by_wu2_plus_wv2 = auv.mul(inv_wu2_plus_wv2) #potential_map = discrete_spectral_transform.idcct2(auv_by_wu2_plus_wv2, exact_expkM, exact_expkN) potential_map = idct2.forward(auv_by_wu2_plus_wv2) # compute energy energy = potential_map.mul(density_map).sum() # torch.set_printoptions(precision=10) # logger.debug("initial_density_map") # logger.debug(initial_density_map/(ctx.bin_size_x*ctx.bin_size_y)) # logger.debug("density_map") # logger.debug(density_map/(ctx.bin_size_x*ctx.bin_size_y)) # logger.debug("auv_by_wu2_plus_wv2") # logger.debug(auv_by_wu2_plus_wv2) # logger.debug("potential_map") # logger.debug(potential_map) # logger.debug("field_map_x") # logger.debug(ctx.field_map_x) # logger.debug("field_map_y") # logger.debug(ctx.field_map_y) #global plot_count # if plot_count >= 600 and plot_count % 1 == 0: # logger.debug("density_map") # plot(plot_count, density_map.clone().div(bin_size_x*bin_size_y).cpu().numpy(), padding, "summary/%d.density_map" % (plot_count)) # logger.debug("potential_map") # plot(plot_count, potential_map.clone().cpu().numpy(), padding, "summary/%d.potential_map" % (plot_count)) # logger.debug("field_map_x") # plot(plot_count, ctx.field_map_x.clone().cpu().numpy(), padding, "summary/%d.field_map_x" % (plot_count)) # logger.debug("field_map_y") # plot(plot_count, ctx.field_map_y.clone().cpu().numpy(), padding, "summary/%d.field_map_y" % (plot_count)) #plot_count += 1 if pos.is_cuda: torch.cuda.synchronize() logger.debug("density forward %.3f ms" % ((time.time()-tt)*1000)) return energy
def forward( ctx, pos, node_size_x, node_size_y, bin_center_x, bin_center_y, initial_density_map, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, # same dimensions as density map, with padding regions to be 1 num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, perm_M=None, # permutation perm_N=None, # permutation expk_M=None, # 2*exp(j*pi*k/M) expk_N=None, # 2*exp(j*pi*k/N) inv_wu2_plus_wv2_2X=None, # 2.0/(wu^2 + wv^2) wu_by_wu2_plus_wv2_2X=None, # 2*wu/(wu^2 + wv^2) wv_by_wu2_plus_wv2_2X=None, # 2*wv/(wu^2 + wv^2) fast_mode=True # fast mode will discard some computation ): if pos.is_cuda: output = electric_potential_cuda.density_map( pos.view(pos.numel()), node_size_x, node_size_y, bin_center_x, bin_center_y, initial_density_map, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y) else: output = electric_potential_cpp.density_map( pos.view(pos.numel()), node_size_x, node_size_y, bin_center_x, bin_center_y, initial_density_map, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y) # output consists of (density_cost, density_map, max_density) ctx.node_size_x = node_size_x ctx.node_size_y = node_size_y ctx.bin_center_x = bin_center_x ctx.bin_center_y = bin_center_y ctx.target_density = target_density ctx.xl = xl ctx.yl = yl ctx.xh = xh ctx.yh = yh ctx.bin_size_x = bin_size_x ctx.bin_size_y = bin_size_y ctx.num_movable_nodes = num_movable_nodes ctx.num_filler_nodes = num_filler_nodes ctx.padding = padding ctx.num_bins_x = num_bins_x ctx.num_bins_y = num_bins_y ctx.num_movable_impacted_bins_x = num_movable_impacted_bins_x ctx.num_movable_impacted_bins_y = num_movable_impacted_bins_y ctx.num_filler_impacted_bins_x = num_filler_impacted_bins_x ctx.num_filler_impacted_bins_y = num_filler_impacted_bins_y ctx.pos = pos density_map = output.view([ctx.num_bins_x, ctx.num_bins_y]) #density_map = torch.ones([ctx.num_bins_x, ctx.num_bins_y], dtype=pos.dtype, device=pos.device) #ctx.field_map_x = torch.ones([ctx.num_bins_x, ctx.num_bins_y], dtype=pos.dtype, device=pos.device) #ctx.field_map_y = torch.ones([ctx.num_bins_x, ctx.num_bins_y], dtype=pos.dtype, device=pos.device) #return torch.zeros(1, dtype=pos.dtype, device=pos.device) # for DCT M = num_bins_x N = num_bins_y if expk_M is None: perm_M = discrete_spectral_transform.get_perm( M, dtype=torch.int64, device=density_map.device) perm_N = discrete_spectral_transform.get_perm( N, dtype=torch.int64, device=density_map.device) expk_M = discrete_spectral_transform.get_expk( M, dtype=density_map.dtype, device=density_map.device) expk_N = discrete_spectral_transform.get_expk( N, dtype=density_map.dtype, device=density_map.device) # wu and wv if inv_wu2_plus_wv2_2X is None: wu = torch.arange(M, dtype=density_map.dtype, device=density_map.device).mul(2 * np.pi / M).view([M, 1]) wv = torch.arange(N, dtype=density_map.dtype, device=density_map.device).mul(2 * np.pi / N).view([1, N]) wu2_plus_wv2 = wu.pow(2) + wv.pow(2) wu2_plus_wv2[0, 0] = 1.0 # avoid zero-division, it will be zeroed out inv_wu2_plus_wv2_2X = 2.0 / wu2_plus_wv2 inv_wu2_plus_wv2_2X[0, 0] = 0.0 wu_by_wu2_plus_wv2_2X = wu.mul(inv_wu2_plus_wv2_2X) wv_by_wu2_plus_wv2_2X = wv.mul(inv_wu2_plus_wv2_2X) # compute auv density_map.mul_(1.0 / (ctx.bin_size_x * ctx.bin_size_y)) #auv = discrete_spectral_transform.dct2_2N(density_map, expk0=expk_M, expk1=expk_N) auv = dct.dct2(density_map, expk0=expk_M, expk1=expk_N) auv[0, :].mul_(0.5) auv[:, 0].mul_(0.5) # compute field xi auv_by_wu2_plus_wv2_wu = auv.mul(wu_by_wu2_plus_wv2_2X) auv_by_wu2_plus_wv2_wv = auv.mul(wv_by_wu2_plus_wv2_2X) #ctx.field_map_x = discrete_spectral_transform.idsct2(auv_by_wu2_plus_wv2_wu, expk_M, expk_N).contiguous() ctx.field_map_x = dct.idsct2(auv_by_wu2_plus_wv2_wu, expk_M, expk_N) #ctx.field_map_y = discrete_spectral_transform.idcst2(auv_by_wu2_plus_wv2_wv, expk_M, expk_N).contiguous() ctx.field_map_y = dct.idcst2(auv_by_wu2_plus_wv2_wv, expk_M, expk_N) # energy = \sum q*phi # it takes around 80% of the computation time # so I will not always evaluate it if fast_mode: # dummy for invoking backward propagation energy = torch.zeros(1, dtype=pos.dtype, device=pos.device) else: # compute potential phi # auv / (wu**2 + wv**2) auv_by_wu2_plus_wv2 = auv.mul(inv_wu2_plus_wv2_2X).mul_(2) #potential_map = discrete_spectral_transform.idcct2(auv_by_wu2_plus_wv2, expk_M, expk_N) potential_map = dct.idcct2(auv_by_wu2_plus_wv2, expk_M, expk_N) # compute energy energy = potential_map.mul_(density_map).sum() #torch.set_printoptions(precision=10) #print("initial_density_map") #print(initial_density_map/(ctx.bin_size_x*ctx.bin_size_y)) #print("density_map") #print(density_map/(ctx.bin_size_x*ctx.bin_size_y)) #print("auv_by_wu2_plus_wv2") #print(auv_by_wu2_plus_wv2) #print("potential_map") #print(potential_map) #print("field_map_x") #print(ctx.field_map_x) #print("field_map_y") #print(ctx.field_map_y) #global plot_count #if plot_count >= 600 and plot_count % 1 == 0: # print("density_map") # plot(plot_count, density_map.clone().div(bin_size_x*bin_size_y).cpu().numpy(), padding, "summary/%d.density_map" % (plot_count)) # print("potential_map") # plot(plot_count, potential_map.clone().cpu().numpy(), padding, "summary/%d.potential_map" % (plot_count)) # print("field_map_x") # plot(plot_count, ctx.field_map_x.clone().cpu().numpy(), padding, "summary/%d.field_map_x" % (plot_count)) # print("field_map_y") # plot(plot_count, ctx.field_map_y.clone().cpu().numpy(), padding, "summary/%d.field_map_y" % (plot_count)) #plot_count += 1 torch.cuda.synchronize() return energy
def forward( ctx, pos, node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, buf, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, # same dimensions as density map, with padding regions to be 1 num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, sorted_node_map, num_threads): if pos.is_cuda: output = electric_potential_cuda.density_map( pos.view(pos.numel()), node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, sorted_node_map) else: output = electric_potential_cpp.density_map( pos.view(pos.numel()), node_size_x_clamped, node_size_y_clamped, offset_x, offset_y, ratio, bin_center_x, bin_center_y, initial_density_map, buf, target_density, xl, yl, xh, yh, bin_size_x, bin_size_y, num_movable_nodes, num_filler_nodes, padding, padding_mask, num_bins_x, num_bins_y, num_movable_impacted_bins_x, num_movable_impacted_bins_y, num_filler_impacted_bins_x, num_filler_impacted_bins_y, num_threads) bin_area = bin_size_x * bin_size_y density_map = output.view([num_bins_x, num_bins_y]) density_cost = (density_map - target_density * bin_area).clamp_(min=0.0).sum() #torch.set_printoptions(precision=10) # logger.debug("initial_density_map") # logger.debug(initial_density_map/bin_area) # logger.debug("density_map") # logger.debug(density_map/bin_area) return density_cost, density_map.max() / bin_area