def fBatchNorm(x, running_mean, running_variance, gamma, beta, eps, momentum, training, mean_only): if training: if mean_only: if len(x.shape) == 2: x_mean = torch.mean(x, dim=0) else: x_mean = torch.mean(x, dim=[0, 2, 3]) else: if len(x.shape) == 2: x_var, x_mean = torch.var_mean(x, dim=0) else: x_var, x_mean = torch.var_mean(x, dim=[0, 2, 3]) # Linear interpolation: running_mean = running_mean * momentum + x_mean * (1 - momentum) running_mean = torch.lerp(x_mean, running_mean, momentum) if not mean_only: running_variance = torch.lerp(x_var, running_variance, momentum) else: x_mean = running_mean if not mean_only: x_var = running_variance if len(x.shape) == 2: if mean_only: normalized = x + (beta - x_mean) else: normalized = (x - x_mean) * (gamma * (x_var + eps).rsqrt()) + beta return normalized, running_mean, running_variance elif len(x.shape) == 4: if mean_only: normalized = x + (beta - x_mean)[..., None, None] else: normalized = (x - x_mean[..., None, None]) * (gamma[..., None, None] * (x_var[..., None, None] + eps).rsqrt()) + beta[..., None, None] return normalized, running_mean, running_variance
def test_blocks(): d_block = DiscriminatorBlock(128, 32, nonlinearity='leaky_relu', param=LEAKINESS) g_block = GeneratorBlock(128, 32, nonlinearity='leaky_relu', param=LEAKINESS) x_in = torch.randn(64, 128, 4, 4, 4) print('var mean in:', torch.var_mean(x_in)) x_out = d_block(x_in) print('var mean out', torch.var_mean(x_out)) x_out = g_block(x_in) print('var mean out', torch.var_mean(x_out)) x_in = torch.randn(64, 512, 512) print('var mean in:', torch.var_mean(x_in)) linear = EqualizedLinear(512, 512, nonlinearity='leaky_relu', param=LEAKINESS) x_out = nn.functional.leaky_relu(linear(x_in), negative_slope=LEAKINESS) print('var mean out', torch.var_mean(x_out))
def test_padding_mode(self, padding_mode, fill_value): torch.random.manual_seed(42) layer = MatchShapes(strategy="pad", padding_mode=padding_mode, fill_value=fill_value) t1_in = torch.rand(1, 1, 10, 10) t2_in = torch.rand(1, 1, 12, 12) t1, t2 = layer([t1_in, t2_in]) assert t1.shape == t2.shape assert tuple(t2.shape[2:]) == (12, 12) assert tuple(t1.shape[2:]) == (12, 12) if padding_mode == "mean": assert torch.allclose(t1.mean(), t1_in.mean()) assert torch.allclose(t2.mean(), t2_in.mean()) if padding_mode == "var_mean": t1_var, t1_mean = torch.var_mean(t1_in) t1_var_new, t1_mean_new = torch.var_mean(t1) assert torch.allclose(t1_mean, t1_mean_new, atol=3e-2) assert torch.allclose(t1_var, t1_var_new, atol=2e-2) t2_var, t2_mean = torch.var_mean(t2_in) t2_var_new, t2_mean_new = torch.var_mean(t2) assert torch.allclose(t2_mean, t2_mean_new, atol=3e-2) assert torch.allclose(t2_var, t2_var_new, atol=2e-2)
def test_fallback_multiple_returns(self): # NB: One day we will implement a batching rule for torch.var_mean # If/when we do, this test should be replaced to test the fallback # path on another operator to avoid bitrot. B0, B1, B2 = 2, 3, 1237 tensor = torch.randn(B0, 10) self._assert_uses_vmap_fallback((torch.var_mean, ), (tensor, )) # fallback correctness on torch.var_mean result = vmap(torch.var_mean)(tensor) expected = torch.var_mean(tensor, dim=1) self.assertEqual(result, expected) # nested vmap tensor = torch.randn(B0, B1, 10) result = vmap(vmap(torch.var_mean))(tensor) expected = torch.var_mean(tensor, dim=2) self.assertEqual(result, expected) # big batch size, nested vmap tensor = torch.randn(B0, B1, B2, 10) result = vmap(vmap(vmap(torch.var_mean)))(tensor) expected = torch.var_mean(tensor, dim=3) self.assertEqual(result, expected)
def ssim(output, target): x = output y = target var_x, mean_x = torch.var_mean(x) var_y, mean_y = torch.var_mean(y) cov_x_y = torch.sum(torch.mul(x - mean_x, y - mean_y)) / x.view(-1, 1).shape[0] c1 = (0.01 * 1.8) ** 2 c2 = (0.03 * 1.8) ** 2 return (2 * mean_x * mean_y + c1) * (2 * cov_x_y + c2) / ( (mean_x ** 2 + mean_y ** 2 + c1) * (var_x + var_y + c2))
def statistics_from_samples(self, nn_state, samples): """Estimates the expected value, variance, and the standard error of the observable using the given samples. :param nn_state: The NeuralState that drew the samples. :type nn_state: qucumber.nn_states.NeuralStateBase :param samples: A batch of sample states to calculate the observable on. :type samples: torch.Tensor :returns: A dictionary containing the (estimated) expected value (key: "mean"), variance (key: "variance"), and standard error (key: "std_error") of the observable. Also outputs the total number of drawn samples (key: "num_samples"). :rtype: dict(str, float) """ obs_samples = self.apply(nn_state, samples).data variance, mean = torch.var_mean(obs_samples) variance, mean = variance.item(), mean.item() std_error = np.sqrt(variance / len(obs_samples)) return { "mean": mean, "variance": variance, "std_error": std_error, "num_samples": len(obs_samples), }
def plot_results(self) -> None: # Save predictions (including all MC Dropout iterations) in logs directory. np.savetxt("predictions.csv", self.preds.numpy(), delimiter=",") fig, ax = plt.subplots(figsize=(16, 8)) x_axis: List[int] = list(range(len(self.ys))) # Play around with alpha here to see uncertainty better! ax.plot(x_axis, self.ys, label="observation", alpha=0.8) ax.plot(x_axis, self.preds, label="prediction") if self.config.mc_dropout: preds_var, preds_mean = torch.var_mean(self.preds, dim=1) ax.fill_between(x_axis, preds_mean - torch.sqrt(preds_var), preds_mean + torch.sqrt(preds_var), alpha=0.5, color='orange', label='pred uncertainty') plot_name: str = "Results-MCDropout" ax.set_title("MC Dropout Results") else: plot_name = "Results" ax.set_title(f"Test set NSE: {self.test_metric:.3f}") ax.legend() ax.set_xlabel("Day") ax.set_ylabel("Discharge (mm/d)") # Save plot to png file. fig.savefig( os.path.join(SAVE_PATH, self.config.run_name, f"{plot_name.lower()}.png")) # Convert plot to PIL image and log to wandb. pil_image = Image.frombytes('RGB', fig.canvas.get_width_height(), fig.canvas.tostring_rgb()) self.logger.experiment.log( {plot_name: wandb.Image(pil_image, mode='RGB', caption=plot_name)}) self.printer.info("Plot generated and saved.")
def dataset_mean_std(dataset: Dataset) -> Tuple[float, float]: """ Compute the mean and standard deviation of a dataset. The dataset must return a tensor as first element in each sample tuple. This can be quite long because each sample is processed separately, to avoid saturating the memory with a huge tensor. :param dataset: The dataset for which to compute mean and std :return: The mean and std """ len_dataset = len(dataset) # Mean sum_mean = torch.tensor(0, dtype=torch.float) for sample in dataset: sum_mean += sample[0].mean() global_mean = sum_mean / len_dataset # STD sum_variance = torch.tensor(0, dtype=torch.float) for sample in dataset: var, mean = torch.var_mean(sample[0]) sum_variance += (mean - global_mean) ** 2 + var global_variance = sum_variance / len_dataset std = global_variance.sqrt() return global_mean.item(), std.item()
def do_test_beyesian(distortion): num_infer = config['params']['num_infer'] ''' Model''' net = net_factory.load_model(config=config, num_classes=num_classes, dropout=config['params']['dropout']) net = net.to(device) ckpt = torch.load(os.path.join(config['exp']['path'], 'best.pth'), map_location=device) weights = utils._load_weights(ckpt['net']) missing_keys = net.load_state_dict(weights, strict=True) print(missing_keys) '''print out net''' num_parameters = 0. for param in net.parameters(): sizes = param.size() num_layer_param = 1. for size in sizes: num_layer_param *= size num_parameters += num_layer_param print("num. of parameters : " + str(num_parameters)) ''' inference ''' net.eval() net.apply(apply_mc_dropout) certainties = list() probs_list = list() targets_list = list() with torch.set_grad_enabled(False): for batch_idx, (inputs, targets) in enumerate(tqdm(test_loader)): inputs = _distort_image(distortion, inputs) inputs = inputs.to(device) all_probs = list() for iter_t in range(num_infer): # view_inputs(inputs) logits = net(inputs) probs = logits.softmax(dim=1) all_probs.append(probs.detach().cpu()) all_probs = torch.stack(all_probs) all_probs = all_probs.contiguous().permute(1, 2, 0) var, mean = torch.var_mean(all_probs, dim=2, unbiased=True) probs_list.append(mean) targets_list.append(targets) probs = torch.cat(probs_list) targets = torch.cat(targets_list) ece_loss = ece_criterion(probs, targets, is_logits=False).item() max_probs, max_ind = probs.max(dim=1) all_correct = max_ind.eq(targets).float().sum().item() accuracy = all_correct / probs.shape[0] print('%-3s (accuracy) : %.5f' % (distortion, accuracy)) print('%-3s (ece) : %.5f' % (distortion, ece_loss)) draw_histogram(max_probs.tolist(), distortion, config['exp']['path'])
def survey_step(self, **kwargs): """Basic survey step: sample points, estimate the integral, its error, train model possible keyword arguments: sampling_args: dict training_args: dict """ try: sampling_args = kwargs["sampling_args"] except KeyError: sampling_args = dict() try: training_args = kwargs["training_args"] except KeyError: training_args = dict() x, px, fx = self.sample_survey(**sampling_args) integral_var, integral = torch.var_mean(fx / px) integral = integral.cpu().item() integral_var = integral_var.cpu().item() training_record = self.model_trainer.train_on_batch( x, px, fx, **training_args) self.process_survey_step((x, px, fx), integral, integral_var, training_record=training_record)
def reduction_ops(self): a = torch.randn(4) b = torch.randn(4) return ( torch.argmax(a), torch.argmin(a), torch.amax(a), torch.amin(a), torch.aminmax(a), torch.all(a), torch.any(a), torch.max(a), torch.min(a), torch.dist(a, b), torch.logsumexp(a, 0), torch.mean(a), torch.nanmean(a), torch.median(a), torch.nanmedian(a), torch.mode(a), torch.norm(a), torch.nansum(a), torch.prod(a), torch.quantile(a, torch.tensor([0.25, 0.5, 0.75])), torch.nanquantile(a, torch.tensor([0.25, 0.5, 0.75])), torch.std(a), torch.std_mean(a), torch.sum(a), torch.unique(a), torch.unique_consecutive(a), torch.var(a), torch.var_mean(a), torch.count_nonzero(a), )
def experience(self, x): """Learn input values without computing the output values of them""" if self.until is not None and self.count >= self.until: return count_x = x.shape[self.batch_axis] if count_x == 0: return self.count += count_x rate = count_x / self.count.float() assert rate > 0 assert rate <= 1 var_x, mean_x = torch.var_mean(x, axis=self.batch_axis, keepdims=True, unbiased=False) delta_mean = mean_x - self._mean self._mean += rate * delta_mean self._var += rate * (var_x - self._var + delta_mean * (mean_x - self._mean)) # clear cache self._cached_std_inverse = None
def _batch_norm_for_train(u: Tensor, cov: Tensor, gamma: Tensor, beta: Optional[Tensor], running_mean: Tensor, running_var: Tensor, mean_variance: Optional[Tensor] = None, beta_var=None, momentum: float = 0.9, eps: float = 1e-5): u_var, u_mean = torch.var_mean(u, dim=0, keepdim=True) var = torch.diagonal(cov, dim1=-1, dim2=-2) var = torch.mean(var, dim=0, keepdim=True) if mean_variance is not None: mean_variance = _track_mean_variance(var.reshape(mean_variance.shape), mean_variance, momentum) u_var = u_var + var running_mean, running_var = _track_running_state( u_mean.reshape(running_mean.shape), u_var.reshape(running_var.shape), running_mean, running_var, momentum) norm_weight = _compute_weight(gamma, u_var, eps) u_norm = (u - u_mean) * norm_weight if beta is not None: u_norm = u_norm + beta cov_norm = cov * torch.matmul(norm_weight.unsqueeze(-1), norm_weight.unsqueeze(-2)) if beta_var is not None: cov_norm = cov_norm + functional.var2cov(beta_var) return u_norm, cov_norm, running_mean, running_var, mean_variance
def forward(self, x): # in F.batch_norm `training` regulates whether to use batch stats of buffer stats # if `training` is True and buffers are given, they always would be updated! use_batch_stats = self.training and not self.estimated_stats x = F.batch_norm( x, self.running_mean, self.running_var, self.weight, self.bias, use_batch_stats, self.momentum, self.eps, ) if self.training and self.estimated_stats: with torch.no_grad(): # not sure if needed but just in case # PyTorch BN uses biased var by default var, mean = torch.var_mean(x, dim=(0, 2, 3), unbiased=False) self.running_mean = self.running_mean.mul( 1 - self.momentum).add(mean, alpha=self.momentum) self.running_var = self.running_var.mul(1 - self.momentum).add( var, alpha=self.momentum) x = F.group_norm(x, self.num_groups, self.weight_gn, self.bias_gn, self.eps) func = ACT_FUNC_DICT[self.activation] if self.activation == ACT.LEAKY_RELU: return func(x, inplace=True, negative_slope=self.activation_param) elif self.activation == ACT.ELU: return func(x, inplace=True, alpha=self.activation_param) else: return func(x, inplace=True)
def weight_standardization(weight: torch.Tensor, eps: float): r""" ## Weight Standardization $$\hat{W}_{i,j} = \frac{W_{i,j} - \mu_{W_{i,\cdot}}} {\sigma_{W_{i,\cdot}}}$$ where, \begin{align} W &\in \mathbb{R}^{O \times I} \\ \mu_{W_{i,\cdot}} &= \frac{1}{I} \sum_{j=1}^I W_{i,j} \\ \sigma_{W_{i,\cdot}} &= \sqrt{\frac{1}{I} \sum_{j=1}^I W^2_{i,j} - \mu^2_{W_{i,\cdot}} + \epsilon} \\ \end{align} for a 2D-convolution layer $O$ is the number of output channels ($O = C_{out}$) and $I$ is the number of input channels times the kernel size ($I = C_{in} \times k_H \times k_W$) """ # Get $C_{out}$, $C_{in}$ and kernel shape c_out, c_in, *kernel_shape = weight.shape # Reshape $W$ to $O \times I$ weight = weight.view(c_out, -1) # Calculate # # \begin{align} # \mu_{W_{i,\cdot}} &= \frac{1}{I} \sum_{j=1}^I W_{i,j} \\ # \sigma^2_{W_{i,\cdot}} &= \frac{1}{I} \sum_{j=1}^I W^2_{i,j} - \mu^2_{W_{i,\cdot}} # \end{align} var, mean = torch.var_mean(weight, dim=1, keepdim=True) # Normalize # $$\hat{W}_{i,j} = \frac{W_{i,j} - \mu_{W_{i,\cdot}}} {\sigma_{W_{i,\cdot}}}$$ weight = (weight - mean) / (torch.sqrt(var + eps)) # Change back to original shape and return return weight.view(c_out, c_in, *kernel_shape)
def forward(self, x): w = self.weight v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False) w = (w - m) / (torch.sqrt(v) + self.eps) x = conv2d_same(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups) return x
def AdaptiveInstanceNormalization(input): eps=1e-5 content = input['content'] style = input['style'] n = style.size(1) style=style.view(n,-1) targetStd,targetMean = torch.var_mean(style, 1) return F.instance_norm(content, weight=targetStd, bias=targetMean, eps=eps)
def standardize_weight(self, eps): var, mean = torch.var_mean(self.weight, dim=(1, 2, 3), keepdims=True) fan_in = torch.prod(torch.tensor(self.weight.shape[0:])) scale = torch.rsqrt(torch.max( var * fan_in, torch.tensor(eps).to(var.device))) * self.gain.view_as(var).to(var.device) shift = mean * scale return self.weight * scale - shift
def forward(self, x): w = self.weight v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False) # w = (w - m) / torch.sqrt(v + 1e-10) w = (w - m) * torch.rsqrt(v + 1e-10) return F.conv2d(x, w, self.bias, self.stride, self.padding, self.dilation, self.groups)
def standardize_weight(self): # inplace var, mean = torch.var_mean(self.layer.weight, dim=self.vmdims, keepdims=True) scale = torch.rsqrt(torch.max(var * self.fan_in, self.eps)) * self.gain.view_as(var) shift = mean * scale with torch.no_grad(): self.layer.weight.mul_(scale).sub_(shift)
def confidence_interval( values: Tensor, ci: float = 0.95, dist="auto", tail="two", unbiased: bool = True, dim=-1) -> Tuple[Optional[Tensor], Optional[Tensor]]: r"""Computes a confidence interval for a sample. A Student's T distribution will be used for samples smaller than :math:`N=30`. Otherwise a Normal distribution will be used. Args: values (:class:`torch.Tensor`): Sample values ci (float): The confidence interval compute. Given by :math:`1-\alpha`. unbiased (bool): Whether to use an unbiased estimator in variance computation dist (str): Override which distribution to use. Should be ``"auto"``, ``"t"``, or ``"normal"``. tail (str): Which tailed test to use. Should be ``"left"``, ``"right"``, or ``"two"``. Returns: Tuple of scalar tensors indicating the lower and upper bounds of the confidence interval. For single tailed tests, the non-computed value will be ``None``. """ N = values.shape[dim] alpha = 1 - ci # compute core statistics for values var, mean = torch.var_mean(values, unbiased=unbiased, dim=dim) std = var.sqrt() se = std / std.new_tensor(N).sqrt_() # select distribution if dist == "auto": dist = "t" if N < 30 else "normal" critical_value = BootstrapMixin._get_critical_value(dist, alpha, tail, df=N - 1) lower_bound = mean - critical_value * se upper_bound = mean + critical_value * se if tail == "left": return lower_bound, None elif tail == "right": return None, upper_bound elif tail == "two": return lower_bound, upper_bound else: raise ValueError(f"{tail}")
def standardize_weight(self, eps): mean = torch.var_mean(self.weight, dim=(1, 2), keepdims=True) var = torch.std(self.weight, dim=(1, 2), keepdims=True, unbiased=False) ** 2 fan_in = torch.prod(torch.tensor(self.weight.shape)) scale = torch.rsqrt(torch.max( var * fan_in, torch.tensor(eps).to(var.device))) * self.gain.view_as(var).to(var.device) shift = mean * scale return self.weight * scale - shift
def forward(self, x): weight = self.weight var, mean = torch.var_mean(weight, dim=[1, 2, 3], keepdim=True, unbiased=False) weight = (weight - mean) / torch.sqrt(var + 1e-7) return F.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
def forward(self, x) -> torch.Tensor: if _is_contiguous(x): return F.layer_norm( x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2) else: s, u = torch.var_mean(x, dim=1, unbiased=False, keepdim=True) x = (x - u) * torch.rsqrt(s + self.eps) x = x * self.weight[:, None, None] + self.bias[:, None, None] return x
def forward(self, inp): # Preprocessing # print("input to model : {}".format(inp.shape)) out = self.conv3d_1a_7x7(inp) # print("conv3d_1a_7x7 output : {}".format(out.shape)) out = self.maxPool3d_2a_3x3(out) # print("maxPool3d_2a_3x3 output : {}".format(out.shape)) out = self.conv3d_2b_1x1(out) # print("conv3d_2b_1x1 output : {}".format(out.shape)) out = self.conv3d_2c_3x3(out) # print("conv3d_2c_3x3 output : {}".format(out.shape)) out = self.maxPool3d_3a_3x3(out) # print("maxPool3d_3a_3x3 output : {}".format(out.shape)) out = self.mixed_3b(out) # print("mixed_3b output : {}".format(out.shape)) out = self.mixed_3c(out) # print("mixed_3c output : {}".format(out.shape)) out = self.maxPool3d_4a_3x3(out) # print("maxPool3d_4a_3x3 output : {}".format(out.shape)) out = self.mixed_4b(out) # print("mixed_4b output : {}".format(out.shape)) out = self.mixed_4c(out) # print("mixed_4c output : {}".format(out.shape)) out = self.mixed_4d(out) # print("mixed_4d output : {}".format(out.shape)) out = self.mixed_4e(out) # print("mixed_4e output : {}".format(out.shape)) out = self.mixed_4f(out) # print("mixed_4f output : {}".format(out.shape)) out = self.maxPool3d_5a_2x2(out) # print("maxPool3d_5a_2x2 output : {}".format(out.shape)) out = self.mixed_5b(out) # print("mixed_5b output : {}".format(out.shape)) out = self.mixed_5c(out) feature_map = out ##################################################################################### variance, sample_mean = torch.var_mean(feature_map) sub_map = torch.sub(feature_map, sample_mean) correlation_matrix = torch.div(sub_map, variance) ##################################################################################### # print("mixed_5c output : {}".format(out.shape)) out = self.avg_pool(out) # print("avg_pool output : {}".format(out.shape)) out = self.dropout(out) # print("dropout output : {}".format(out.shape)) out = self.conv3d_0c_1x1(out) # print("conv3d_0c_1x1 output : {}".format(out.shape)) out = out.squeeze(3) out = out.squeeze(3) out = out.mean(2) out_logits = out out = self.softmax(out_logits) return out, correlation_matrix
def forward(self, sparse_matrix): values = sparse_matrix.values var, mean = torch.var_mean(values, dim=0, unbiased=False) values_out = (self.gamma * (values - mean) / torch.sqrt(var + self.eps)) if self.affine: values_out += self.beta out = sparse_matrix.clone() out.values = values_out return out
def forward(self, sparse_tensor): values = sparse_tensor.values var, mean = torch.var_mean(values, dim=1, unbiased=False) values_out = (self.gamma * (values.T - mean) / torch.sqrt(var + self.eps)).T if self.affine: values_out += self.beta out = sparse_tensor.clone() out.values = values_out return out
def get_weight(self): # Get Scaled WS weight OIHW; fan_in = np.prod(self.weight.shape[1:]) var, mean = torch.var_mean(self.weight, dim=(1, 2, 3), keepdims=True) scale = torch.rsqrt( torch.max(var * fan_in, torch.tensor(self.eps).to( var.device))) * self.gain.view_as(var).to(var.device) shift = mean * scale return self.weight * scale - shift
def native_batch_norm( input: Tensor, weight: Optional[Tensor], bias: Optional[Tensor], running_mean: Optional[Tensor], running_var: Optional[Tensor], training: bool, momentum: float, eps: float, ) -> Tuple[Tensor, Tensor, Tensor]: reduction_dims = [0] + list(range(2, input.dim())) if training: # save_mean = torch.sum(input / (input.shape[0] * input.shape[2]), dim=reduction_dims) biased_var, save_mean = torch.var_mean(input, dim=reduction_dims, unbiased=False) save_invstd = 1 / (torch.sqrt(biased_var + eps)) if running_mean is not None: running_mean.copy_(momentum * save_mean + (1 - momentum) * running_mean) if running_var is not None: n = input.numel() / input.shape[1] # This doesn't strictly match eager's numerics, which accumulates var sum and then directly applies the correction # But... that would require re-implementing var here, for negligible numerics gain on a tensor whose # numerics probably don't matter. unbiased_var = biased_var * (n / (n - 1)) running_var.copy_(momentum * unbiased_var + (1 - momentum) * running_var) mean = save_mean invstd = save_invstd else: assert running_mean is not None and running_var is not None mean = running_mean invstd = 1 / (torch.sqrt(running_var + eps)) # Very annoying inconsistency where CPU and CUDA give different shapes if input.device.type == "cuda": save_mean = running_mean save_invstd = invstd else: save_mean = input.new_zeros((0, )) save_invstd = input.new_zeros((0, )) if weight is None: weight = input.new_ones(()) if bias is None: bias = input.new_zeros(()) mean = _unsqueeze_to_dim(mean, input.dim() - 1) invstd = _unsqueeze_to_dim(invstd, input.dim() - 1) weight = _unsqueeze_to_dim(weight, input.dim() - 1) bias = _unsqueeze_to_dim(bias, input.dim() - 1) output = ((input - mean) * invstd) * weight + bias return output, save_mean, save_invstd
def _calc_mean_std(train: Dataset) -> Tuple[torch.Tensor, torch.Tensor]: mean = torch.zeros((3, ), dtype=torch.float) var = torch.zeros((3, ), dtype=torch.float) for i in range(len(train)): v, m = torch.var_mean(train[i][0]) # 0 used to index images mean += m var += v mean /= len(train) var /= len(var) std = torch.sqrt(var) return mean, std