def resize_pt(img, size, cuda, cast_back_uint=True, method=None): assert type(img) is np.ndarray and img.dtype == np.uint8 img_in = torch.from_numpy(img).float().unsqueeze(0).unsqueeze(0) if cuda: img_in = img_in.cuda() img_out = interpolate_bilinear_2d_like_tensorflow1x(img_in, size, align_corners=False, method=method) img_out = img_out.squeeze().cpu().numpy() if cast_back_uint: img_out = img_out.astype(np.uint8) return img_out
def estimate_implementation_exactness(self, cuda): model_pt = create_feature_extractor('inception-v3-compat', ['2048'], cuda=cuda) conv_pt = model_pt.Conv2d_1a_3x3.conv batch_size = 1 # keep_filters = 16 # anything less makes the backends diverge and causes much different results # conv_pt.weight.data = conv_pt.weight[0:keep_filters] # conv_pt.out_channels = keep_filters ds = prepare_input_from_id('cifar10-train', datasets_root=tempfile.gettempdir()) rng = np.random.RandomState(2020) x_pt = torch.cat([ ds[i].unsqueeze(0) for i in rng.choice(len(ds), batch_size, replace=False) ], dim=0) if cuda: x_pt = x_pt.cuda() x_pt = x_pt.float() x_pt = interpolate_bilinear_2d_like_tensorflow1x(x_pt, size=(299, 299), align_corners=False) x_pt = (x_pt - 128) / 128 out_tf = self.forward_tf(conv_pt, x_pt) out_pt_builtin = self.forward_pt(conv_pt, x_pt) out_pt_manualchw = self.forward_pt_manualchw(conv_pt, x_pt) out_pt_manualhwc = self.forward_pt_manualhwc(conv_pt, x_pt) err_abs_tf_pt_builtin = (out_tf - out_pt_builtin).abs() err_abs_tf_pt_manualchw = (out_tf - out_pt_manualchw).abs() err_abs_tf_pt_manualhwc = (out_tf - out_pt_manualhwc).abs() err_abs_pt_builtin_manualchw = (out_pt_builtin - out_pt_manualchw).abs() err_abs_pt_builtin_manualhwc = (out_pt_builtin - out_pt_manualhwc).abs() suffix = f'convolution_{"gpu" if cuda else "cpu"}' self.save(out_tf, f'{suffix}_conv_tf.png') self.save(out_pt_builtin, f'{suffix}_conv_pt_builtin.png') self.save(err_abs_tf_pt_builtin, f'{suffix}_err_abs_tf_pt_builtin.png') self.save(err_abs_tf_pt_manualchw, f'{suffix}_err_abs_tf_pt_manualchw.png') self.save(err_abs_tf_pt_manualhwc, f'{suffix}_err_abs_tf_pt_manualhwc.png') self.save(err_abs_pt_builtin_manualchw, f'{suffix}_err_abs_pt_builtin_manualchw.png') self.save(err_abs_pt_builtin_manualhwc, f'{suffix}_err_abs_pt_builtin_manualhwc.png') flipping_pixel_err_abs = err_abs_tf_pt_builtin[0, 0, -1, -1].item() print( f'{suffix}_bottom_right_flipping_pixel_err_abs={flipping_pixel_err_abs}', file=sys.stderr) err_abs = err_abs_tf_pt_builtin.max().item() print(f'{suffix}_max_pixelwise_err_abs={err_abs}', file=sys.stderr) err_rel = err_abs / out_tf.abs().max().clamp_min(1e-9).item() print(f'{suffix}_max_pixelwise_err_rel={err_rel}', file=sys.stderr) return err_rel
def forward(self, x): vassert( torch.is_tensor(x) and x.dtype == torch.uint8, 'Expecting image as torch.Tensor with dtype=torch.uint8') features = {} remaining_features = self.features_list.copy() x = x.float() # N x 3 x ? x ? x = interpolate_bilinear_2d_like_tensorflow1x( x, size=(self.INPUT_IMAGE_SIZE, self.INPUT_IMAGE_SIZE), align_corners=False, ) # N x 3 x 299 x 299 # x = (x - 128) * torch.tensor(0.0078125, dtype=torch.float32, device=x.device) # really happening in graph x = (x - 128) / 128 # but this gives bit-exact output _of this step_ too # N x 3 x 299 x 299 x = self.Conv2d_1a_3x3(x) # N x 32 x 149 x 149 x = self.Conv2d_2a_3x3(x) # N x 32 x 147 x 147 x = self.Conv2d_2b_3x3(x) # N x 64 x 147 x 147 x = self.MaxPool_1(x) # N x 64 x 73 x 73 if '64' in remaining_features: features['64'] = F.adaptive_avg_pool2d( x, output_size=(1, 1)).squeeze(-1).squeeze(-1) remaining_features.remove('64') if len(remaining_features) == 0: return tuple(features[a] for a in self.features_list) x = self.Conv2d_3b_1x1(x) # N x 80 x 73 x 73 x = self.Conv2d_4a_3x3(x) # N x 192 x 71 x 71 x = self.MaxPool_2(x) # N x 192 x 35 x 35 if '192' in remaining_features: features['192'] = F.adaptive_avg_pool2d( x, output_size=(1, 1)).squeeze(-1).squeeze(-1) remaining_features.remove('192') if len(remaining_features) == 0: return tuple(features[a] for a in self.features_list) x = self.Mixed_5b(x) # N x 256 x 35 x 35 x = self.Mixed_5c(x) # N x 288 x 35 x 35 x = self.Mixed_5d(x) # N x 288 x 35 x 35 x = self.Mixed_6a(x) # N x 768 x 17 x 17 x = self.Mixed_6b(x) # N x 768 x 17 x 17 x = self.Mixed_6c(x) # N x 768 x 17 x 17 x = self.Mixed_6d(x) # N x 768 x 17 x 17 x = self.Mixed_6e(x) # N x 768 x 17 x 17 if '768' in remaining_features: features['768'] = F.adaptive_avg_pool2d( x, output_size=(1, 1)).squeeze(-1).squeeze(-1) remaining_features.remove('768') if len(remaining_features) == 0: return tuple(features[a] for a in self.features_list) x = self.Mixed_7a(x) # N x 1280 x 8 x 8 x = self.Mixed_7b(x) # N x 2048 x 8 x 8 x = self.Mixed_7c(x) # N x 2048 x 8 x 8 x = self.AvgPool(x) # N x 2048 x 1 x 1 x = torch.flatten(x, 1) # N x 2048 if '2048' in remaining_features: features['2048'] = x remaining_features.remove('2048') if len(remaining_features) == 0: return tuple(features[a] for a in self.features_list) if 'logits_unbiased' in remaining_features: x = x.mm(self.fc.weight.T) # N x 1008 (num_classes) features['logits_unbiased'] = x remaining_features.remove('logits_unbiased') if len(remaining_features) == 0: return tuple(features[a] for a in self.features_list) x = x + self.fc.bias.unsqueeze(0) else: x = self.fc(x) # N x 1008 (num_classes) features['logits'] = x return tuple(features[a] for a in self.features_list)