def _update_layer_params(layer: torch.nn.Conv2d, new_weight: np.ndarray, new_bias: np.ndarray): """ update parameters (weights and bias) for given layer :param layer: layer to be updated :param new_weight: newer weights :param new_bias: new bias :return: Nothing """ assert isinstance(layer, torch.nn.Conv2d) assert len(new_weight.shape) == 4 assert new_weight.shape[0] == layer.out_channels assert new_weight.shape[1] == layer.in_channels assert new_weight.shape[2] == layer.kernel_size[0] assert new_weight.shape[3] == layer.kernel_size[1] new_weight = torch.FloatTensor(new_weight) if layer.weight.is_cuda: new_weight = new_weight.cuda() layer.weight.data = new_weight if new_bias is not None: assert len(new_bias.shape) == 1 assert new_bias.shape[0] == layer.out_channels new_bias = torch.FloatTensor(new_bias) if layer.bias.is_cuda: new_bias = new_bias.cuda() layer.bias.data = new_bias
async def run_default(img: np.ndarray, detect_size: int, cuda: bool, verbose: bool, args: dict) : global DEFAULT_MODEL img_resized, target_ratio, _, pad_w, pad_h = imgproc.resize_aspect_ratio(cv2.bilateralFilter(img, 17, 80, 80), detect_size, cv2.INTER_LINEAR, mag_ratio = 1) ratio_h = ratio_w = 1 / target_ratio if verbose : print(f'Detection resolution: {img_resized.shape[1]}x{img_resized.shape[0]}') img_resized = img_resized.astype(np.float32) / 127.5 - 1.0 img = torch.from_numpy(img_resized) if cuda : img = img.cuda() img = einops.rearrange(img, 'h w c -> 1 c h w') with torch.no_grad() : db, mask = DEFAULT_MODEL(img) db = db.sigmoid().cpu() mask = mask[0, 0, :, :].cpu().numpy() det = dbnet_utils.SegDetectorRepresenter(args.text_threshold, args.box_threshold, unclip_ratio = args.unclip_ratio) boxes, scores = det({'shape':[(img_resized.shape[0], img_resized.shape[1])]}, db) boxes, scores = boxes[0], scores[0] if boxes.size == 0 : polys = [] else : idx = boxes.reshape(boxes.shape[0], -1).sum(axis=1) > 0 polys, _ = boxes[idx], scores[idx] polys = polys.astype(np.float64) polys = craft_utils.adjustResultCoordinates(polys, ratio_w, ratio_h, ratio_net = 1) polys = polys.astype(np.int16) textlines = [Quadrilateral(pts.astype(int), '', 0) for pts in polys] textlines = list(filter(lambda q: q.area > 16, textlines)) mask_resized = cv2.resize(mask, (mask.shape[1] * 2, mask.shape[0] * 2), interpolation = cv2.INTER_LINEAR) if pad_h > 0 : mask_resized = mask_resized[:-pad_h, :] elif pad_w > 0 : mask_resized = mask_resized[:, : -pad_w] return textlines, np.clip(mask_resized * 255, 0, 255).astype(np.uint8)
def process(self, frame: np.ndarray) -> float: frame = torch.tensor(frame, dtype=torch.float32) frame.unsqueeze_(0).unsqueeze_(0) if self._use_cuda: frame = frame.cuda() Dx = F.conv2d(frame, weight=self._Mx, stride=1, padding=1).abs() Dy = F.conv2d(frame, weight=self._My, stride=1, padding=1).abs() max_Dx = F.max_pool2d(Dx, kernel_size=8, stride=8) max_Dy = F.max_pool2d(Dy, kernel_size=8, stride=8) max_Dx[max_Dx < EPSILON] = EPSILON max_Dy[max_Dy < EPSILON] = EPSILON Bx = F.conv2d(Dx.abs(), weight=self._omega1v, stride=8) / max_Dx By = F.conv2d(Dy.abs(), weight=self._omega1h, stride=8) / max_Dy B = torch.max(Bx, By) D = (Dx**2 + Dy**2)**0.5 max_D = F.max_pool2d(D, kernel_size=8, stride=8) max_D[max_D < EPSILON] = EPSILON I = F.conv2d(D, weight=self._omega2, stride=8) / max_D L = torch.abs(B**self.k - I**self.k) / torch.abs(B**self.k + I**self.k + EPSILON) return L.mean().item()
def _transform_single_normal_deep_dream(self, stft: np.ndarray) -> np.ndarray: octaves = [] for i in range(self._n_octaves - 1): hw = stft.shape[:2] lo = \ cv2.resize(stft, tuple(np.int32(np.float32(hw[::-1]) / self._octave_scale)))[ ..., None] hi = stft - cv2.resize(lo, tuple(np.int32(hw[::-1])))[..., None] stft = lo octaves.append(hi) for octave in tqdm.trange(self._n_octaves, desc="Image optimisation"): if octave > 0: hi = octaves[-octave] stft = cv2.resize(stft, tuple(np.int32(hi.shape[:2][::-1])))[ ..., None] + hi stft = torch.from_numpy(stft).float() if self._use_gpu: stft = stft.cuda() stft = stft.permute((2, 0, 1)) for i in tqdm.trange(self._number_of_iterations, desc="Octave optimisation"): g = self.calc_grad_tiled(stft) g /= (g.abs().mean() + 1e-8) g *= self._optimisation_step_size stft += g if self._use_gpu: stft = stft.cpu() stft = stft.detach().numpy().transpose((1, 2, 0)) return stft
def predict(self, input: np.ndarray, target: Optional[np.ndarray] = None) -> np.ndarray: input = torch.tensor(input, dtype=torch.float32) if target is not None: target = torch.tensor(target) if torch.cuda.is_available(): input = input.cuda() if target is not None: target = target.cuda() # Prediction for layer in self.layers: input = layer(input) output = input.argmax(dim=1) # Update if target is not None: self.optimizer.zero_grad() loss = self.loss(input, target) loss.backward() self.optimizer.step() if torch.cuda.is_available(): output = output.cpu() return output.numpy()
def infer(self, clip: np.ndarray) -> Union[np.ndarray, List[np.ndarray]]: """ Infer and return predictions given the input clip from video source. Note that the output is either a numpy.ndarray type or a list consisting of numpy.ndarray. For an inference engine that runs a neural network, which producing a single output, the returned object is a numpy.ndarray of shape (T, C). `T` represents the number of time steps and is dependent on the length of the provided input clip. `C` represents the number of output channels while For an inference engine running a multi-output neural network, the returned object is a list of numpy.ndarray, one for each output. :param clip: The video frame to be inferred. :return: Predictions from the neural network. """ with torch.no_grad(): clip = self.net.preprocess(clip) if self.use_gpu: clip = clip.cuda() predictions = self.net(clip) if isinstance(predictions, list): predictions = [pred.cpu().numpy() for pred in predictions] else: predictions = predictions.cpu().numpy() return predictions
def encode_sample(sample: np.ndarray, model_name: str = None, cuda=True) -> np.ndarray: model_name = model_name or default_model input_fn = models[model_name][2] output_fn = models[model_name][3] sample = input_fn(sample) if sample.ndim == 3: sample = np.expand_dims(sample, axis=0) sample = torch.from_numpy(sample).float() model = models[model_name][0] if cuda: sample = sample.cuda() model = model.cuda() model.eval() with torch.no_grad(): feature = model(sample) feature = output_fn(feature) feature = torch.flatten(feature) if cuda: feature = feature.cpu() return feature.numpy()
def __call__(self, img: np.ndarray, mask: Optional[np.ndarray], ignore_mask=True) -> np.ndarray: (img, mask), h, w = self._preprocess(img, mask) with torch.no_grad(): inputs = [img.cuda()] if not ignore_mask: inputs += [mask] pred = self.model(*inputs) return self._postprocess(pred)[:h, :w, :]
def _generate_batch(self, features: np.ndarray): if isinstance(features, np.ndarray): features = torch.from_numpy(features).float() if torch.cuda.is_available(): features = features.cuda() generator_output = self._generator(features) images = self._vectors_to_images(generator_output).data.cpu() images = ((images + 1) * 255).clamp(0, 255).type(torch.uint8) print(images.shape, images.dtype, images.min(), images.max()) return images.detach().numpy()
def _preprocess(self, img:np.ndarray, size:int) -> torch.Tensor: h, w = get_image_size_after_resize_preserving_aspect_ratio(h=img.shape[0], w=img.shape[1], target_size=size) img = cv2.resize(img, (w, h), interpolation = cv2.INTER_NEAREST) img = to_tensor(img) img = normalize(img, self.img_normalization["mean"], self.img_normalization["std"]) img = img.unsqueeze(0) if cfg.is_cuda: img = img.cuda() return img
def infer(self, clip: np.ndarray, batch_size=None) -> Union[np.ndarray, List[np.ndarray]]: """ Infer and return predictions given the input clip from video source. Note that the output is either a numpy.ndarray type or a list consisting of numpy.ndarray. For an inference engine that runs a neural network, which producing a single output, the returned object is a numpy.ndarray of shape (T, C). `T` represents the number of time steps and is dependent on the length of the provided input clip. `C` represents the number of output channels while For an inference engine running a multi-output neural network, the returned object is a list of numpy.ndarray, one for each output. :param clip: The video frame to be inferred. :param batch_size: Batch size to perform inference. Warning, only use if you did not remove padding from model. :return: Predictions from the neural network. """ predictions = [] with torch.no_grad(): clip = self.net.preprocess(clip) if self.use_gpu: clip = clip.cuda() if batch_size is None: predictions = self.net(clip) else: for sub_clip in torch.Tensor.split(clip, batch_size): if sub_clip.shape[ 0] >= self.net.num_required_frames_per_layer_padding[ 0]: predictions.append(self.net(sub_clip)) if isinstance(predictions[0], list): predictions = list(zip(predictions)) predictions = [torch.cat(x, dim=0) for x in predictions] else: predictions = torch.cat(predictions, dim=0) if isinstance(predictions, list): predictions = [pred.cpu().numpy() for pred in predictions] else: predictions = predictions.cpu().numpy() return predictions
def init_vae_dataloaders( X_train: np.ndarray, X_test: np.ndarray, y_train: Optional[np.ndarray] = None, y_test: Optional[np.ndarray] = None, batch_size: int = 100, ) -> Tuple[Type[torch.utils.data.DataLoader]]: """ Returns train and test dataloaders for training images in a native PyTorch format """ labels_ = y_train is not None and y_test is not None X_train = torch.from_numpy(X_train).float() X_test = torch.from_numpy(X_test).float() if labels_: y_train = torch.from_numpy(y_train) y_test = torch.from_numpy(y_test) if torch.cuda.is_available(): X_train = X_train.cuda() X_test = X_test.cuda() if labels_: y_train = y_train.cuda() y_test = y_test.cuda() if labels_: data_train = torch.utils.data.TensorDataset(X_train, y_train) data_test = torch.utils.data.TensorDataset(X_test, y_test) else: data_train = torch.utils.data.TensorDataset(X_train) data_test = torch.utils.data.TensorDataset(X_test) train_loader = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(data_test, batch_size=batch_size) return train_loader, test_loader
def to_var(x: np.ndarray, cuda: bool, volatile: bool = False): """ Convert a numpy array to a torch Tensor Parameters ---------- x: np.array the input tensor cuda: bool move tensor to cuda volatile: bool, optional make tensor volatile """ x = Variable(torch.from_numpy(np.asarray(x)), volatile=volatile) if (torch.cuda.is_available() and cuda is None) or cuda: x = x.cuda() return x
def fix(image: np.ndarray) -> np.ndarray: image = image[:, :, ::-1] image = resize(image, (224, 224), preserve_range=True) image = norm_(image) image = image.transpose(2, 0, 1) image = torch.from_numpy(image) image = image.unsqueeze(0) image_fixed = generator(image.cuda()) image_out = denorm_( resize(image_fixed[0].cpu().detach().numpy().transpose(1, 2, 0), (250, 250), preserve_range=True).astype(np.float32)).astype(np.uint8) return image_out[:, :, ::-1]
def forward(self, x: np.ndarray) -> Tuple[Any, Any]: """ TODO: describe function """ x = torch.from_numpy(x).permute(2, 0, 1) # [h, w, c] to [c, h, w] x = Variable(x.unsqueeze(0)) # [c, h, w] to [b, c, h, w] if self.is_cuda: x = x.cuda() y, feature = self.net(x) y_refiner = self.refine_net(y, feature) # make score and link map score_text = y[0, :, :, 0].cpu().data.numpy() score_link = y_refiner[0, :, :, 0].cpu().data.numpy() return score_text, score_link
def to_torch(x_batch: Union[sparse.csr_matrix, np.ndarray], y_batch: np.ndarray, use_gpu: bool): """ Функция, преобразующая вход в тензора pytorch, с опциональным перекладыванием на GPU :param x_batch: матрица признаков :param y_batch: матрица ответов. Если нужно преобразовать только x_batch, можно подавать любой ndarray :param use_gpu: нужно ли перекладывать на GPU. :return: """ if isinstance(x_batch, sparse.csr_matrix): x_batch = x_batch.toarray() x_batch = torch.from_numpy(x_batch).type(torch.FloatTensor) y_batch = torch.from_numpy(y_batch).type(torch.FloatTensor) if use_gpu: x_batch = x_batch.cuda() y_batch = y_batch.cuda() # return Variable(x_batch), Variable(y_batch) return x_batch, y_batch
def get_database_prediction(self, face: np.ndarray) -> (int, torch.Tensor): """ Predict if the passes face belong to some one in the database :param face: np.ndarray Tha face to recognize :return: int The index of the person who belongs the face, or -1 if it didn't find a match """ assert self.initialized face = torch.from_numpy(face) if self.is_cuda: face = face.cuda() latent_vector = self.model(face) norm = torch.norm(self.vectors - latent_vector, dim=1) prediction = self.predictor.predict(norm) prob = torch.max(prediction) index = torch.argmax(prediction) return index, prob
def predict_sign(model: nn.Module, image: np.ndarray, threshold: float = 0.99, verbose: bool = False) -> str: if torch.cuda.is_available(): model = model.cuda() model.eval() image = normalize_image_input(image) if torch.cuda.is_available(): image = image.cuda() confidence, predicted_index = nn.functional.softmax(model(image), dim=1).max(dim=1) if verbose: print(confidence.item()) if confidence >= threshold: return chr(65 + predicted_index.item()) else: return None
def run(self, image: np.ndarray): """Runs inference on a single image. Args: image: A PIL.Image object, raw input image. Returns: resized_image: RGB image resized from original input image. seg_map: Segmentation map of `resized_image`. """ resized_image = cv2.resize(image, tuple(config.TEST.IMAGE_SIZE), interpolation=cv2.INTER_LINEAR) resized_image = self.base_dataset.input_transform(resized_image) resized_image = resized_image.transpose((2, 0, 1)) resized_image = np.expand_dims(resized_image, axis=0) image = torch.from_numpy(resized_image) image = image.cuda() seg_map = self.model(image)[0] if self.model_type == 'hrnetocr': seg_map = seg_map[0] seg_map = seg_map.detach() seg_map = torch.argmax(seg_map, dim=0) return resized_image, seg_map.cpu()
def make_tensor(array : np.ndarray) -> torch.Tensor: array = torch.tensor(array) if(params.gpu_flag): array = array.cuda(params.gpu_name) return array
def predict(self, input: np.ndarray, target: Optional[np.ndarray] = None, return_probs: bool = False) -> np.ndarray: """ Predict the class for the given inputs, and optionally update the weights. Args: input (np.array[B, N]): Batch of B N-dim float input vectors. target (np.array[B]): Optional batch of B target class labels (bool, or int if num_classes given) which, if given, triggers an online update if given. return_probs (bool): Whether to return the classification probability (for each one-vs-all classifier if num_classes given) instead of the class. Returns: Predicted class per input instance (bool, or int if num_classes given), or classification probabilities if return_probs set. """ if input.ndim == 1: input = torch.unsqueeze(input, dim=0) # Base predictions base_preds = self.base_predictor(input) # Default data transform input = torch.tensor(input, dtype=torch.float32) base_preds = torch.tensor(base_preds, dtype=torch.float32) if target is not None: target = torch.tensor(target) if torch.cuda.is_available(): input = input.cuda() base_preds = base_preds.cuda() if target is not None: target = target.cuda() # Context context = input # Target if target is not None: target = nn.functional.one_hot(target.long(), self.num_classes) if self.num_classes == 2: target = target[:, 1:] # Base logits base_preds = torch.clamp(base_preds, min=self.pred_clipping, max=(1.0 - self.pred_clipping)) logits = torch.log(base_preds / (1.0 - base_preds)) if self.bias: logits[:, 0] = self.base_bias # Layers for layer in self.layers: logits = layer.predict(logit=logits, context=context, target=target) logits = torch.squeeze(logits, dim=1) if self.num_classes == 2: logits = logits.squeeze(dim=1) if return_probs: output = torch.sigmoid(logits) elif self.num_classes == 2: output = logits > 0 else: output = torch.argmax(logits, dim=1) if torch.cuda.is_available(): output = output.cpu() return output.numpy()
def to_tensor(data: np.ndarray) -> torch.Tensor: data = torch.from_numpy(data) if torch.cuda.is_available(): return data.cuda() return data