def vis_fcn_result(img: torch.Tensor, label: torch.Tensor, result: torch.Tensor, result_path, file_name, as_binary=False): img = img.cpu().numpy() result = result.cpu().detach().numpy() img = img * 255 img = img.astype(np.uint8) img = np.transpose(img, (1, 2, 0)) # result=result[1]>0.5 if as_binary: result = np.argmax(result, axis=0) result = result * 127 # result=np.where(result[1]>0.5,255,0) else: result = result[1] * 255 result = result.astype(np.uint8) label = label.cpu().numpy() * 127 label = label.astype(np.uint8) io.imsave(os.path.join(result_path, '{0}_label.jpg'.format(file_name)), label) io.imsave(os.path.join(result_path, '{0}_img.jpg'.format(file_name)), img) io.imsave(os.path.join(result_path, '{0}_result.png'.format(file_name)), result)
def __call__(self, x: torch.Tensor) -> torch.Tensor: """Returns MFCC of ``x`` using legacy implementation. The legacy implementation used PyTorch==0.4 and ``python_speech_features`` instead of PyTorch>=1.4 and ``torchaudio.transforms.MFCC``. It is necessary to: 1) Re-scale to int16 range 2) Convert to numpy. 3) Cast to float64 4) Use python_speech_features.mfcc(...) 5) Convert back to torch.Tensor(dtype=torch.float32) 6) Reshape Tensor to match return shape of `torchaudio.transforms.MFCC`. Where required, the reasoning behind the above steps are given in comments. Args: x: Input :py:class:`torch.Tensor` size ``[1, time_samples]``. Returns: A :py:class:`torch.Tensor` of size ``[1, self.n_mfcc, timesteps]``. """ # 1) # In the previous implementation, on file read, the data was converted # to int16. With torchaudio.load the data is normalise to range # [-1., 1.]. It is necessary to convert to int16 range (and type): x = x * (1 << 15) # rescale to int16 range x = x.to(torch.int16) # cast to int16 # 2) x = ( x.squeeze() ) # (since numpy deals with zero dim tensors differently) x = x.numpy() # 3) # Previously we used a numpy operation on a torch.Tensor. In # PyTorch >=1.0 the Tensor type is preserved but in version <=0.4, # this was not the case and all tensors were treated as np.float64. # Hence, to preserve the previous behaviour: x = x.astype(np.float64) # 4) x = python_speech_features.mfcc( x, samplerate=self.samplerate, winlen=self.winlen, winstep=self.winstep, numcep=self.numcep, ) # 5) x = torch.FloatTensor(x) # 6) x = x.transpose(0, 1) x = x.unsqueeze(0) return x
def LogME(f: torch.Tensor, y: torch.Tensor, regression=False): """ :param f: [N, F], feature matrix from pre-trained model :param y: target labels. For classification, y has shape [N] with element in [0, C_t). For regression, y has shape [N, C] with C regression-labels :param regression: whether regression :return: LogME score (how well f can fit y directly) """ f = f.detach().cpu().numpy().astype(np.float64) y = y.detach().cpu().numpy() if regression: y = y.astype(np.float64) fh = f f = f.transpose() D, N = f.shape v, s, vh = np.linalg.svd(f @ fh, full_matrices=True) evidences = [] if regression: K = y.shape[1] for i in range(K): y_ = y[:, i] evidence = each_evidence(y_, f, fh, v, s, vh, N, D) evidences.append(evidence) else: K = int(y.max() + 1) for i in range(K): y_ = (y == i).astype(np.float64) evidence = each_evidence(y_, f, fh, v, s, vh, N, D) evidences.append(evidence) return np.mean(evidences)
def get_class_eval_metric(output_hist: torch.Tensor, y_true: torch.Tensor, criterion: Optional[str] = "accuracy", **kwargs) -> float: """ Get eval criterion for a single class. As required, get: - class with max probability (for discrete metrics like accuracy etc.) - probs for y=1 (for computing AUC) and return the metric value for the given class. """ y_predicted = output_hist[:, 1] if criterion == "auc" else output_hist.max( dim=-1)[1] y_true, y_predicted = convert_tensor_to_numpy((y_true, y_predicted)) assert y_true.shape == y_predicted.shape y_true = y_true.astype(int) if criterion == "auc": fpr, tpr, threshold = roc_curve(y_true, y_predicted.astype(float), **kwargs) return auc(fpr, tpr) # criterion is one of ["accuracy", "precision", "recall", "f1"] criterion_fn_dict = { "accuracy": accuracy_score, "precision": precision_score, "recall": recall_score, "f1": f1_score, } return criterion_fn_dict[criterion](y_true, y_predicted.astype(int), **kwargs)
def mask_from_tensor(mask: torch.Tensor, squeeze_single_channel=False, dtype=None) -> np.ndarray: mask = np.moveaxis(to_numpy(mask), 0, -1) if squeeze_single_channel and mask.shape[-1] == 1: mask = np.squeeze(mask, -1) if dtype is not None: mask = mask.astype(dtype) return mask
def get_binarized_image( distances: torch.Tensor, width: int, height: int ) -> np.ndarray: distances = ( distances.float().reshape((height, width)).detach().cpu().numpy() ) distances = distances.astype(np.float32) return distances
def to_img(tensor: torch.Tensor, bgr=True): if tensor.dim() == 4: tensor = tensor[0] tensor = tensor.detach().cpu().numpy() tensor = tensor.clip(0, 1) * 255 tensor = tensor.astype(np.uint8).transpose((1, 2, 0)) if bgr: tensor = tensor[:, :, ::-1] return tensor
def visualize_single(distances: torch.Tensor, width: int, height: int): distances = ( distances.clamp_min(0).reshape((height, width)).detach().cpu().numpy() ) distances = distances.astype(np.float32) plt.figure() plt.imshow(distances, cmap="gray") plt.show()
def _write_single_png(mask: Tensor, save_dir: str, filename: str): assert mask.shape.__len__() == 2, mask.shape mask = mask.cpu().detach().numpy() if not os.path.exists(save_dir): os.mkdir(save_dir) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") imsave(os.path.join(save_dir, (filename + ".png")), mask.astype(np.uint8))
def save_to(tensor: torch.Tensor, dest: str, real: bool, id: int, mode: str): ''' Save a tensor and its visualization image to specified destination. Args: tensor: predicted traffic state tensor, of shape (1, 69*69) dest: destination path of saving real: boolean value, indicating real future or predicted id: id of generated tensor/image mode: `pnf` or `od` ''' def create_dir(directory: str): ''' Helper function to create directory Args: directory: a string describing the to be created dir ''' try: if not os.path.exists(directory): os.makedirs(directory) except OSError: print('Error: Creating directory. ' + directory) raise OSError # print(f'tensor.shape -> {tensor.shape}') if type(tensor) == tuple: # means it's a return value of VAE tensor = tensor[0] if mode == 'od': tensor = tensor.reshape((69, 69, 1)) if mode == 'pnf': tensor = tensor.reshape((69, 69, 3)) image_dest = dest + '/viz_images' tensor_dest = dest + '/tensors' # ensure the directories exist create_dir(image_dest) create_dir(tensor_dest) image_dest += f'/{"r" if real else "p"}-{id}.png' tensor_dest += f'/{"r" if real else "p"}-{id}.pkl' # save tensor torch.save(tensor, tensor_dest) # tensor to image tensor = tensor.cpu() tensor = tensor.detach().numpy() # simple normalize tensor *= (255 // tensor.max()) tensor = tensor.astype('uint8') tensor = tensor.squeeze() if mode == 'od': image = Image.fromarray(tensor, mode='L') elif mode == 'pnf': image = Image.fromarray(tensor) image = image.resize((345, 345)) image.save(image_dest)
def get_image(self, img_name_list, idx): img_name = os.path.join(self.training_image_path, img_name_list[idx]) image = io.imread(img_name) # get image size im_size = np.asarray(image.shape) # convert to torch Variable image = np.expand_dims(image.transpose((2, 0, 1)), 0) image = Tensor(image.astype(np.float32)) image_var = Variable(image, requires_grad=False) # Resize image using bilinear sampling with identity affine tnf image = self.affineTnf(image_var).data.squeeze(0) im_size = Tensor(im_size.astype(np.float32)) return (image, im_size)
def get_points(self, point_coords_list, idx): point_coords = point_coords_list[idx, :].reshape(2, 10) # # swap X,Y coords, as the the row,col order (Y,X) is used for computations # point_coords = point_coords[[1,0],:] # make arrays float tensor for subsequent processing point_coords = Tensor(point_coords.astype(np.float32)) return point_coords
def get_binarized_image(self, distances: torch.Tensor) -> np.ndarray: distances = ( (distances <= 0) .float() .reshape((self.height, self.width)) .detach() .cpu() .numpy() ) distances = distances.astype(np.float32) return distances
def build_render_rgb(img: torch.Tensor, num_envs: int, env_height: int, env_width: int, num_rows: int, num_cols: int, render_size: int, env: Optional[int] = None) -> np.ndarray: """Util for viewing VecEnvs in a human friendly way. Args: img: Batch of RGB Tensors of the envs. Shape = (num_envs, 3, env_size, env_size). num_envs: Number of envs inside the VecEnv. env_height: Size of VecEnv. env_width: Size of VecEnv. num_rows: Number of rows of envs to view. num_cols: Number of columns of envs to view. render_size: Pixel size of each viewed env. env: Optional specified environment to view. """ # Convert to numpy img = img.cpu().numpy() # Rearrange images depending on number of envs if num_envs == 1 or env is not None: num_cols = num_rows = 1 img = img[env or 0] img = np.transpose(img, (1, 2, 0)) else: num_rows = num_rows num_cols = num_cols # Make a grid of images output = np.zeros((env_height * num_rows, env_width * num_cols, 3)) for i in range(num_rows): for j in range(num_cols): output[i * env_height:(i + 1) * env_height, j * env_width:(j + 1) * env_width, :] = np.transpose( img[i * num_cols + j], (1, 2, 0)) img = output ratio = env_width / env_height img = np.array( Image.fromarray(img.astype(np.uint8)).resize( (int(render_size * num_cols * ratio), int(render_size * num_rows)))) return img
def preprocess_data(data: torch.Tensor, label: torch.Tensor) -> Tuple[np.ndarray, np.ndarray]: """Preprocess data from data loader. Args: data: image input label: corresponding output Returns: pre-processed data and label in numpy format. """ data, label = data.numpy(), label.numpy() data = data.reshape(opts.batches_per_step, opts.batch_size, -1) label = label.reshape(opts.batches_per_step, opts.batch_size) label = label.astype(np.int32) return data, label
def __init__(self, embeddings: torch.Tensor, distance: str = 'cosine', use_gpu=False): embeddings = np.array(embeddings) if embeddings.dtype != np.float32: embeddings = embeddings.astype(np.float32) self.N, self.embedding_size = embeddings.shape # Store embeddings, so that they can be easyly found by index self.embeddings = embeddings if embeddings.flags['C_CONTIGUOUS'] \ else np.ascontiguousarray(embeddings) self.index = { 'cosine': faiss.IndexFlatIP, 'euclidean': faiss.IndexFlatL2 }[distance](self.embedding_size) if use_gpu: self.index = faiss.index_cpu_to_all_gpus(self.index) # Add data to index in batches for i in range(0, self.N, 10000): self.index.add(self.embeddings[i:i + 10000])
def high_accuracy_forward_euler_step(pos: torch.Tensor, vel: torch.Tensor, acc: torch.Tensor, step_size=0.001, duration=1) -> Tuple[torch.Tensor]: """ Update velocity and position for [duration] time, using simple Forward-Euler integration rules and Newtonian mechanics. Assumes that the acceleration is constant. A simple fool-proof but inaccurate method. Yet with a tiny step size, the approximation should be accurate (abeit slow to compute). We have the following initival value problem for the position: pos' = acc*t pos(0) = 0 """ vel = vel.astype("float64") t = 0 while t < duration: vel += step_size * acc pos = pos + step_size * (acc * t) t += step_size return pos, vel
def pgd_batch_bin( model: nn.Module, data: torch.Tensor, device, input_bits, loss_fn: AttackLoss, epsilon: int = 1, nr_step: int = 10, step_size_factor: float = 1.5, restore=False, xmin: int = 0, xmax: int = 255, channel_dim: int=1, rng=None) -> ( typing.Tuple[torch.Tensor, int]): """run PGD attack on a model with binary bit input The model should take bit inputs, and ``data`` should provide integer values; returned adversarial input contains bit values rather than integer values. ``xmin`` and ``xmax`` should be shifted to retain only neeed number of bits. :param channel_dim: dimension for input channels, along which the bits should be unpacked :return: ``(adv_bits_input, adv_int_input, accuracy)`` """ assert input_bits >= 1 and input_bits <= 8 assert (type(epsilon) is int and epsilon > 0 and epsilon < (1 << (input_bits - 1))), ( f'invalid {epsilon=} {input_bits=}') xmin >>= 8 - input_bits xmax >>= 8 - input_bits orig_dtype = data.dtype orig_device = data.device data = torch_as_npy(data) assert data.dtype == np.uint8 assert data.min() >= xmin and data.max() <= xmax, ( f'data_range=[{data.min()}, {data.max()}] {xmin=} {xmax=}') data = data.astype(np.int32) data_range = (np.clip(data - epsilon, xmin, xmax), np.clip(data + epsilon, xmin, xmax)) def clip_inplace(x): return np.clip(x, data_range[0], data_range[1], out=x) rng = get_rng_if_none(rng) data = clip_inplace( data + rng.randint(-epsilon, epsilon + 1, size=data.shape, dtype=data.dtype)) # coefficient of each bit to sum the gradients grad_coeff = np.zeros(input_bits, dtype=np.float32) eps_bits = epsilon.bit_length() grad_coeff[-eps_bits:] = 0.5 ** np.arange(eps_bits) assert channel_dim == 1 and data.ndim == 4, 'unimplemented config' n, c, h, w = data.shape grad_shape = (n, c, input_bits, h, w) grad_coeff = (torch.from_numpy(grad_coeff). to(device).view(1, 1, input_bits, 1, 1)) def as_torch_bits_tensor(x: np.ndarray): x = x.reshape(n, c, 1, h, w).astype(np.uint8) bits = np.unpackbits(x, axis=2) bits = bits[:, :, -input_bits:].reshape(n, c * input_bits, h, w) ret = torch.from_numpy(bits).to(device).to(torch.float32).detach_() ret.requires_grad_(True) return ret def grad_reduce_bits_sum(x: torch.Tensor): return (x.view(grad_shape).mul_(grad_coeff)).sum(dim=channel_dim+1) step_size = int(max(1, round(min(step_size_factor / nr_step, 1) * epsilon))) with model_with_input_grad(model, restore) as model: for i in range(nr_step): data_torch = as_torch_bits_tensor(data) loss_fn.get_loss(model(data_torch)).backward() with torch.no_grad(): grad = grad_reduce_bits_sum(data_torch.grad) step_torch = grad.sign_().to(torch.int32).mul_(step_size) data += torch_as_npy(step_torch) data = clip_inplace(data) data_bits = as_torch_bits_tensor(data) data_int = torch.from_numpy(data).to(orig_device, orig_dtype) return data_bits, data_int, loss_fn.get_prec(model(data_bits))
def _postprocess(x: torch.Tensor) -> np.ndarray: x, = x x = x.detach().cpu().float().numpy() x = (np.transpose(x, (1, 2, 0)) + 1) / 2.0 * 255.0 return x.astype('uint8')
def _make_grid(self, x: torch.Tensor) -> np.ndarray: x = make_grid(x, nrow=self._num_grid_rows, padding=0) x = x.cpu().numpy().transpose((1, 2, 0)) x = x.astype(np.uint8) return cv2.cvtColor(x, cv2.COLOR_RGB2BGR)
def __getitem__(self, idx): # read image img_name = os.path.join(self.training_image_path, self.img_names[idx]) image = io.imread(img_name) # read theta if not self.random_sample: theta = self.theta_array[idx, :] if self.geometric_model == 'affine': # reshape theta to 2x3 matrix [A|t] where # first row corresponds to X and second to Y theta = theta[[3, 2, 5, 1, 0, 4]].reshape(2, 3) elif self.geometric_model == 'tps': theta = np.expand_dims(np.expand_dims(theta, 1), 2) else: if self.geometric_model == 'affine': alpha = (np.random.rand(1) - 0.5) * 2 * np.pi * self.random_alpha theta = np.random.rand(6) theta[[2, 5]] = (theta[[2, 5]] - 0.5) * 2 * self.random_t theta[0] = ( 1 + (theta[0] - 0.5) * 2 * self.random_s) * np.cos(alpha) theta[1] = (1 + (theta[1] - 0.5) * 2 * self.random_s) * ( -np.sin(alpha)) theta[3] = ( 1 + (theta[3] - 0.5) * 2 * self.random_s) * np.sin(alpha) theta[4] = ( 1 + (theta[4] - 0.5) * 2 * self.random_s) * np.cos(alpha) theta = theta.reshape(2, 3) elif self.geometric_model == 'tps': theta = np.array([ -1, -1, -1, 0, 0, 0, 1, 1, 1, -1, 0, 1, -1, 0, 1, -1, 0, 1 ]) theta = theta + (np.random.rand(18) - 0.5) * 2 * self.random_t_tps else: raise ValueError( "Available values for geometric_model are 'affine' or 'tps'" ) # make arrays float tensor for subsequent processing image = Tensor(image.astype(np.float32)) theta = Tensor(theta.astype(np.float32)) # permute order of image to CHW image = image.transpose(1, 2).transpose(0, 1) # Resize image using bilinear sampling with identity affine tnf if image.size()[0] != self.out_h or image.size()[1] != self.out_w: image = self.affineTnf( Variable(image.unsqueeze(0), requires_grad=False)).data.squeeze(0) sample = {'image': image, 'theta': theta} if self.transform: sample = self.transform(sample) return sample
def __call__(self, x: torch.Tensor) -> torch.Tensor: ev, exponent, sigma = self.get_params() tmo = hdrpy.tmo.EilertsenTMO(ev=ev, exponent=exponent, sigma=sigma) x = tmo(x.clone().detach().numpy().transpose((1, 2, 0))) return torch.from_numpy(x.astype(np.float32).transpose( (2, 0, 1))).clone()
def _input_transform(img: Tensor): # TODO: change net output from 0-1 to 0-255 # BGR to RGB img = img.cpu().numpy() * 255 img = np.transpose(img, (1, 2, 0))[:, :, ::-1] return img.astype(np.uint8)
def showImg(img: torch.Tensor): img = np.rollaxis(img.numpy(), 0, 3) img = (img / 2 + 0.5) * 255 img = img.astype(np.uint8) plt.imshow(img) plt.show()
def __getitem__(self, idx): # read image img_name_a = os.path.join(self.training_image_path, self.img_a_names[idx]) img_name_b = os.path.join(self.training_image_path, self.img_b_names[idx]) image_a = cv2.imread(img_name_a, cv2.IMREAD_COLOR) image_b = cv2.imread(img_name_b, cv2.IMREAD_COLOR) vertices = ast.literal_eval(self.img_a_vertices[idx]) # read theta theta = self.theta_array[idx, :] if self.geometric_model == 'affine': # reshape theta to 2x3 matrix [A|t] where # first row corresponds to X and second to Y theta = theta[[3, 2, 5, 1, 0, 4]].reshape(2, 3) elif self.geometric_model == 'tps': theta = np.expand_dims(np.expand_dims(theta, 1), 2) # hold in the image_a only the crop but maintaining resolution # we achieve this by blanking each pixel outside the vertices image_a = blank_outside_verts(image_a, vertices) # make arrays float tensor for subsequent processing image_a = Tensor(image_a.astype(np.float32)) image_b = Tensor(image_b.astype(np.float32)) theta = Tensor(theta.astype(np.float32)) # permute order of image to CHW image_a = image_a.transpose(1, 2).transpose(0, 1) image_b = image_b.transpose(1, 2).transpose(0, 1) # Resize image using bilinear sampling with identity affine tnf if image_a.size()[0] != self.out_h or image_a.size()[1] != self.out_w: image_a = self.affineTnf( Variable(image_a.unsqueeze(0), requires_grad=False)).data.squeeze(0) # Resize image using bilinear sampling with identity affine tnf if image_b.size()[0] != self.out_h or image_b.size()[1] != self.out_w: image_b = self.affineTnf( Variable(image_b.unsqueeze(0), requires_grad=False)).data.squeeze(0) # if self.mode == 'test': sample = { 'image_a': image_a, 'vertices_a': Tensor(vertices), 'image_b': image_b, 'theta': theta } if self.transform: sample = self.transform(sample) return sample
def __call__(self, x: torch.Tensor) -> torch.Tensor: x = self.itmo(x.clone().detach().numpy().transpose((1, 2, 0))) return torch.from_numpy(x.astype(np.float32).transpose( (2, 0, 1))).clone()