def forward(self, input): assert isinstance(input, spconv.SparseConvTensor) features = input.features device = features.device indices = input.indices spatial_shape = input.spatial_shape batch_size = input.batch_size if not self.subm: out_spatial_shape = ops.get_conv_output_size( spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation) else: out_spatial_shape = spatial_shape out_tensor = input.shadow_copy() if input.benchmark: if self.name is None: raise ValueError( "you need to assign name to spmodules before benchmark (spconv.utils.bench.assign_name_to_spmod)" ) if self.name not in input.benchmark_record: input.benchmark_record[self.name] = { "type": "SparseMaxPool", "indice_gen_time": [], "time": [], "num_points": [], "num_out_points": [], "params": { "kernel_size": self.kernel_size, "stride": self.stride, "padding": self.padding, "dilation": self.dilation, "channels": features.shape[1], } } if input.benchmark: torch.cuda.synchronize() t = time.time() out_padding = [0] * self.ndim indice_dict = input.indice_dict.copy() profile_ctx = nullcontext() if input._timer is not None and self._sparse_unique_name: profile_ctx = input._timer.namespace(self._sparse_unique_name) with profile_ctx: if self.algo == ConvAlgo.Native: outids, indice_pairs, indice_pairs_num = ops.get_indice_pairs( indices, batch_size, spatial_shape, ConvAlgo.Native, self.kernel_size, self.stride, self.padding, self.dilation, out_padding, False) if input.benchmark: torch.cuda.synchronize() interval = time.time() - t out_tensor.benchmark_record[ self.name]["indice_gen_time"].append(interval) t = time.time() if self.indice_key is not None: datas = input.find_indice_pair(self.indice_key) if datas is None: indice_data = IndiceData(outids, indices, indice_pairs, indice_pairs_num, spatial_shape, out_spatial_shape, is_subm=False, algo=self.algo, ksize=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation) indice_dict[self.indice_key] = indice_data else: raise ValueError( f"indice key {self.indice_key} exists") out_features = Fsp.indice_maxpool(features, indice_pairs.to(device), indice_pairs_num.to(device), outids.shape[0]) else: with input._timer.namespace("gen_pairs"): res = ops.get_indice_pairs_implicit_gemm( indices, batch_size, spatial_shape, self.algo, ksize=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, out_padding=out_padding, subm=self.subm, is_train=(not self.subm) or self.training, alloc=input.thrust_allocator, timer=input._timer) outids = res[0] num_inds_per_loc = res[1] pair_fwd = res[2] pair_bwd = res[3] pair_mask_fwd_splits = res[4] pair_mask_bwd_splits = res[5] mask_argsort_fwd_splits = res[6] mask_argsort_bwd_splits = res[7] masks = res[8] if self.indice_key is not None: indice_data = ImplicitGemmIndiceData( outids, indices, pair_fwd, pair_bwd, pair_mask_fwd_splits=pair_mask_fwd_splits, pair_mask_bwd_splits=pair_mask_bwd_splits, mask_argsort_fwd_splits=mask_argsort_fwd_splits, mask_argsort_bwd_splits=mask_argsort_bwd_splits, masks=masks, is_subm=self.subm, spatial_shape=spatial_shape, out_spatial_shape=out_spatial_shape, algo=self.algo, ksize=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation) msg = f"your indice key {self.indice_key} already exists in this sparse tensor." assert self.indice_key not in indice_dict, msg indice_dict[self.indice_key] = indice_data out_features = Fsp.indice_maxpool_implicit_gemm( features, pair_fwd, pair_bwd, outids.shape[0]) if input.benchmark: torch.cuda.synchronize() interval = time.time() - t out_tensor.benchmark_record[self.name]["time"].append(interval) out_tensor.benchmark_record[self.name]["num_points"].append( features.shape[0]) out_tensor.benchmark_record[self.name]["num_out_points"].append( out_features.shape[0]) out_tensor = out_tensor.replace_feature(out_features) out_tensor.indices = outids out_tensor.indice_dict = indice_dict out_tensor.spatial_shape = out_spatial_shape return out_tensor
def record(self, name: str, stream: int = 0): if self.enable: return self._record(name, stream) else: return nullcontext()
def forward(self, input: SparseConvTensor): assert isinstance(input, SparseConvTensor) assert input.features.shape[ 1] == self.in_channels, "channel size mismatch" features = input.features device = features.device indices = input.indices spatial_shape = input.spatial_shape batch_size = input.batch_size if not self.subm: if self.transposed: out_spatial_shape = ops.get_deconv_output_size( spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation, self.output_padding) else: out_spatial_shape = ops.get_conv_output_size( spatial_shape, self.kernel_size, self.stride, self.padding, self.dilation) else: out_spatial_shape = spatial_shape # print(self._sparse_unique_name, spatial_shape, out_spatial_shape) # input.update_grid(out_spatial_shape) # t = time.time() out_tensor = input.shadow_copy() if input.benchmark: if self.name is None: raise ValueError( "you need to assign name to spmodules before benchmark (spconv.utils.bench.assign_name_to_spmod)" ) if self.name not in input.benchmark_record: input.benchmark_record[self.name] = { "type": "SparseConvolution", "indice_gen_time": [], "time": [], "num_points": [], "num_out_points": [], "params": { "kernel_size": self.kernel_size, "stride": self.stride, "padding": self.padding, "dilation": self.dilation, "output_padding": self.output_padding, "subm": self.subm, "transposed": self.transposed, "input_channels": self.in_channels, "out_channels": self.out_channels, } } if self.conv1x1: if FILTER_HWIO: features = torch.mm( input.features, self.weight.view(self.out_channels, self.in_channels).T) else: features = torch.mm( input.features, self.weight.view(self.in_channels, self.out_channels)) if self.bias is not None: features += self.bias out_tensor = out_tensor.replace_feature(features) # padding may change spatial shape of conv 1x1. out_tensor.spatial_shape = out_spatial_shape return out_tensor indice_dict = input.indice_dict.copy() algo = self.algo if self.indice_key is not None: datas = input.find_indice_pair(self.indice_key) if datas is not None: msg = "due to limitation of pytorch, you must provide same algo to layers share same indice key." assert algo == datas.algo, msg # algo = datas.algo profile_ctx = nullcontext() if input._timer is not None and self._sparse_unique_name: profile_ctx = input._timer.namespace(self._sparse_unique_name) with profile_ctx: if algo == ConvAlgo.Native: datas = input.find_indice_pair(self.indice_key) if datas is not None: assert isinstance(datas, IndiceData) if self.inverse: assert datas is not None and self.indice_key is not None assert datas.is_subm is False, "inverse conv can only be used with standard conv and pool ops." outids = datas.indices indice_pairs = datas.indice_pairs indice_pair_num = datas.indice_pair_num out_spatial_shape = datas.spatial_shape assert datas.ksize == self.kernel_size, "inverse conv must have same kernel size as its couple conv" else: if self.indice_key is not None and datas is not None: outids = datas.out_indices indice_pairs = datas.indice_pairs indice_pair_num = datas.indice_pair_num assert self.subm, "only support reuse subm indices" self._check_subm_reuse_valid(input, spatial_shape, datas) else: if input.benchmark: torch.cuda.synchronize() t = time.time() try: outids, indice_pairs, indice_pair_num = ops.get_indice_pairs( indices, batch_size, spatial_shape, algo, self.kernel_size, self.stride, self.padding, self.dilation, self.output_padding, self.subm, self.transposed) except Exception as e: msg = "[Exception|native_pair]" msg += f"indices={indices.shape},bs={batch_size},ss={spatial_shape}," msg += f"algo={algo},ksize={self.kernel_size},stride={self.stride}," msg += f"padding={self.padding},dilation={self.dilation},subm={self.subm}," msg += f"transpose={self.transposed}" print(msg, file=sys.stderr) spconv_save_debug_data(indices) raise e if input.benchmark: torch.cuda.synchronize() interval = time.time() - t out_tensor.benchmark_record[ self.name]["indice_gen_time"].append(interval) indice_data = IndiceData(outids, indices, indice_pairs, indice_pair_num, spatial_shape, out_spatial_shape, is_subm=self.subm, algo=algo, ksize=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation) if self.indice_key is not None: msg = f"your indice key {self.indice_key} already exists in this sparse tensor." assert self.indice_key not in indice_dict, msg indice_dict[self.indice_key] = indice_data if input.benchmark: torch.cuda.synchronize() t = time.time() indice_pairs_calc = indice_pairs if indice_pairs.device != features.device: indice_pairs_calc = indice_pairs.to(features.device) if self.subm: out_features = Fsp.indice_subm_conv( features, self.weight, indice_pairs_calc, indice_pair_num, outids.shape[0], algo, input._timer) else: if self.inverse: out_features = Fsp.indice_inverse_conv( features, self.weight, indice_pairs_calc, indice_pair_num, outids.shape[0], algo) else: out_features = Fsp.indice_conv(features, self.weight, indice_pairs_calc, indice_pair_num, outids.shape[0], algo, input._timer) else: datas = input.find_indice_pair(self.indice_key) if datas is not None: assert isinstance(datas, ImplicitGemmIndiceData) if self.inverse: assert datas is not None and self.indice_key is not None assert datas.is_subm is False, "inverse conv can only be used with standard conv and pool ops." outids = datas.indices pair_fwd = datas.pair_bwd pair_bwd = datas.pair_fwd pair_mask_fwd_splits = datas.pair_mask_bwd_splits pair_mask_bwd_splits = datas.pair_mask_fwd_splits mask_argsort_fwd_splits = datas.mask_argsort_bwd_splits mask_argsort_bwd_splits = datas.mask_argsort_fwd_splits masks = datas.masks out_spatial_shape = datas.spatial_shape assert datas.ksize == self.kernel_size, "inverse conv must have same kernel size as its couple conv" else: if self.indice_key is not None and datas is not None: outids = datas.out_indices pair_fwd = datas.pair_fwd pair_bwd = datas.pair_bwd pair_mask_fwd_splits = datas.pair_mask_fwd_splits pair_mask_bwd_splits = datas.pair_mask_bwd_splits mask_argsort_fwd_splits = datas.mask_argsort_fwd_splits mask_argsort_bwd_splits = datas.mask_argsort_bwd_splits masks = datas.masks assert self.subm, "only support reuse subm indices" self._check_subm_reuse_valid(input, spatial_shape, datas) else: with input._timer.namespace("gen_pairs"): # we need to gen bwd indices for regular conv # because it may be inversed. try: res = ops.get_indice_pairs_implicit_gemm( indices, batch_size, spatial_shape, algo, ksize=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation, out_padding=self.output_padding, subm=self.subm, transpose=self.transposed, is_train=(not self.subm) or self.training, alloc=input.thrust_allocator, timer=input._timer) except Exception as e: msg = "[Exception|implicit_gemm_pair]" msg += f"indices={indices.shape},bs={batch_size},ss={spatial_shape}," msg += f"algo={algo},ksize={self.kernel_size},stride={self.stride}," msg += f"padding={self.padding},dilation={self.dilation},subm={self.subm}," msg += f"transpose={self.transposed}" print(msg, file=sys.stderr) spconv_save_debug_data(indices) raise e outids = res[0] num_inds_per_loc = res[1] pair_fwd = res[2] pair_bwd = res[3] pair_mask_fwd_splits = res[4] pair_mask_bwd_splits = res[5] mask_argsort_fwd_splits = res[6] mask_argsort_bwd_splits = res[7] masks = res[8] if self.indice_key is not None: indice_data = ImplicitGemmIndiceData( outids, indices, pair_fwd, pair_bwd, pair_mask_fwd_splits=pair_mask_fwd_splits, pair_mask_bwd_splits=pair_mask_bwd_splits, mask_argsort_fwd_splits=mask_argsort_fwd_splits, mask_argsort_bwd_splits=mask_argsort_bwd_splits, masks=masks, is_subm=self.subm, spatial_shape=spatial_shape, out_spatial_shape=out_spatial_shape, algo=algo, ksize=self.kernel_size, stride=self.stride, padding=self.padding, dilation=self.dilation) msg = f"your indice key {self.indice_key} already exists in this sparse tensor." assert self.indice_key not in indice_dict, msg indice_dict[self.indice_key] = indice_data if input.benchmark: torch.cuda.synchronize() t = time.time() num_activate_out = outids.shape[0] out_features = Fsp.implicit_gemm( features, self.weight, pair_fwd, pair_bwd, pair_mask_fwd_splits, pair_mask_bwd_splits, mask_argsort_fwd_splits, mask_argsort_bwd_splits, num_activate_out, masks, self.training, self.subm, input._timer, self.fp32_accum) if self.bias is not None: out_features += self.bias if input.benchmark: torch.cuda.synchronize() interval = time.time() - t out_tensor.benchmark_record[self.name]["time"].append(interval) out_tensor.benchmark_record[self.name]["num_points"].append( features.shape[0]) out_tensor.benchmark_record[self.name]["num_out_points"].append( out_features.shape[0]) out_tensor = out_tensor.replace_feature(out_features) out_tensor.indices = outids out_tensor.indice_dict = indice_dict out_tensor.spatial_shape = out_spatial_shape return out_tensor
def namespace(self, name: str): if self.enable: return self._namespace(name) else: return nullcontext()