def extract(self, im: np.ndarray, debug_save_name=None): with fluid.dygraph.guard(): if debug_save_name is not None: np.savez(debug_save_name, im) im = im / 255. # don't use im /= 255. since we don't want to alter the input im -= self.mean im /= self.std im = n2p(im) output_features = self.net.extract_features(im, self.feature_layers) # Store the raw resnet features which are input to iounet iounet_backbone_features = TensorList([ output_features[layer] for layer in self.iounet_feature_layers ]) self.iounet_backbone_features = iounet_backbone_features.numpy() # Store the processed features from iounet, just before pooling self.iounet_features = TensorList([ f.numpy() for f in self.iou_predictor.get_iou_feat( iounet_backbone_features) ]) output = TensorList([ output_features[layer].numpy() for layer in self.output_layers ]) return output
def extract(self, im: np.ndarray, debug_save_name=None): with fluid.dygraph.guard(): if debug_save_name is not None: np.savez(debug_save_name, im) im = im / 255. # don't use im /= 255. since we don't want to alter the input im -= self.mean im /= self.std im = n2p(im) output_features = self.net.extract_features( im, self.feature_layers) # Store the raw backbone features which are input to estimator estimator_backbone_features = TensorList([ output_features[layer] for layer in self.estimator_feature_layers ]) self.estimator_backbone_features = estimator_backbone_features.numpy( ) output = TensorList([ output_features[layer].numpy() for layer in self.output_layers ]) return output
def extract(self, im: np.ndarray, debug_save_name=None): with fluid.dygraph.guard(): if debug_save_name is not None: np.savez(debug_save_name, im) im = n2p(im) output_features = self.net.extract_backbone_features(im) # Store the raw backbone features which are input to estimator output = TensorList([layer.numpy() for layer in output_features]) return output
def __call__(self, image): if isinstance(image, PTensor): sz = image.shape[2:] filter = [n2p(f) for f in self.filter_np] im1 = Fconv2d(layers.reshape(image, [-1, 1, sz[0], sz[1]]), filter[0], padding=(self.filter_size[0], 0)) return self.crop_to_output( layers.reshape( Fconv2d(im1, filter[1], padding=(0, self.filter_size[1])), [1, -1, sz[0], sz[1]])) else: return paddle_to_numpy(self(numpy_to_paddle(image)))
def get_feature(self, im: np.ndarray): """Get the feature. Generally, call this function. args: im: image patch """ # Return empty tensor if it should not be used is_color = im.shape[1] == 3 if is_color and not self.use_for_color or not is_color and not self.use_for_gray: return np.array([]) feat_list = self.extract(im) output_sz = [None] * len( feat_list) if self.output_size is None else self.output_size # Pool/downsample with fluid.dygraph.guard(): feat_list = [n2p(f) for f in feat_list] for i, (sz, s) in enumerate(zip(output_sz, self.pool_stride)): if sz is not None: feat_list[i] = layers.adaptive_pool2d(feat_list[i], sz, pool_type='avg') elif s != 1: feat_list[i] = layers.pool2d(feat_list[i], s, pool_stride=s, pool_type='avg') # Normalize if self.normalize_power is not None: new_feat_list = [] for feat in feat_list: norm = (layers.reduce_sum(layers.reshape( layers.abs(feat), [feat.shape[0], 1, 1, -1])** self.normalize_power, dim=3, keep_dim=True) / (feat.shape[1] * feat.shape[2] * feat.shape[3]) + 1e-10)**(1 / self.normalize_power) feat = broadcast_op(feat, norm, 'div') new_feat_list.append(feat) feat_list = new_feat_list # To numpy feat_list = TensorList([f.numpy() for f in feat_list]) return feat_list
def optimize_boxes(self, iou_features, init_boxes): with fluid.dygraph.guard(): # Optimize iounet boxes init_boxes = np.reshape(init_boxes, (1, -1, 4)) step_length = self.params.box_refinement_step_length target_feat = self.target_feat.apply(n2p) iou_features = iou_features.apply(n2p) output_boxes = n2p(init_boxes) for f in iou_features: f.stop_gradient = False for i_ in range(self.params.box_refinement_iter): # forward pass bb_init = output_boxes bb_init.stop_gradient = False outputs = self.iou_predictor.predict_iou( target_feat, iou_features, bb_init) if isinstance(outputs, (list, tuple)): outputs = outputs[0] outputs.backward() # Update proposal bb_init_np = bb_init.numpy() bb_init_gd = bb_init.gradient() output_boxes = bb_init_np + step_length * bb_init_gd * np.tile( bb_init_np[:, :, 2:], (1, 1, 2)) output_boxes = n2p(output_boxes) step_length *= self.params.box_refinement_step_decay return layers.reshape(output_boxes, (-1, 4)).numpy(), layers.reshape( outputs, (-1, )).numpy()
def init_iou_net(self): # Setup IoU net self.iou_predictor = self.params.features.get_unique_attribute( 'iou_predictor') # Get target boxes for the different augmentations self.iou_target_box = self.get_iounet_box(self.pos, self.target_sz, self.pos.round(), self.target_scale) target_boxes = TensorList() if self.params.iounet_augmentation: for T in self.transforms: if not isinstance( T, (augmentation.Identity, augmentation.Translation, augmentation.FlipHorizontal, augmentation.FlipVertical, augmentation.Blur)): break target_boxes.append(self.iou_target_box + np.array([T.shift[1], T.shift[0], 0, 0])) else: target_boxes.append(self.iou_target_box.copy()) target_boxes = np.concatenate(target_boxes.view(1, 4), 0) # Get iou features iou_backbone_features = self.get_iou_backbone_features() # Remove other augmentations such as rotation iou_backbone_features = TensorList( [x[:target_boxes.shape[0], ...] for x in iou_backbone_features]) # Extract target feat with fluid.dygraph.guard(): iou_backbone_features = iou_backbone_features.apply(n2p) target_boxes = n2p(target_boxes) target_feat = self.iou_predictor.get_filter( iou_backbone_features, target_boxes) self.target_feat = TensorList( [layers.reduce_mean(x, 0).numpy() for x in target_feat]) if getattr(self.params, 'iounet_not_use_reference', False): self.target_feat = TensorList([ np.full_like(tf, tf.norm() / tf.numel()) for tf in self.target_feat ])
def get_feature(self, im: np.ndarray): """Get the feature. Generally, call this function. args: im: image patch """ # Return empty tensor if it should not be used is_color = im.shape[1] == 3 if is_color and not self.use_for_color or not is_color and not self.use_for_gray: return np.array([]) # Extract feature feat = self.extract(im) # Pool/downsample with fluid.dygraph.guard(): feat = n2p(feat) if self.output_size is not None: feat = layers.adaptive_pool2d(feat, self.output_size, 'avg') elif self.pool_stride != 1: feat = layers.pool2d( feat, self.pool_stride, pool_stride=self.pool_stride, pool_type='avg') # Normalize if self.normalize_power is not None: feat /= ( layers.reduce_sum( layers.reshape( layers.abs(feat), [feat.shape[0], 1, 1, -1])** self.normalize_power, dim=3, keep_dim=True) / (feat.shape[1] * feat.shape[2] * feat.shape[3]) + 1e-10)**( 1 / self.normalize_power) feat = feat.numpy() return feat
def generate_init_samples(self, im: np.ndarray) -> TensorList: """Generate augmented initial samples.""" # Compute augmentation size aug_expansion_factor = getattr(self.params, 'augmentation_expansion_factor', None) aug_expansion_sz = self.img_sample_sz.copy() aug_output_sz = None if aug_expansion_factor is not None and aug_expansion_factor != 1: aug_expansion_sz = (self.img_sample_sz * aug_expansion_factor).astype('long') aug_expansion_sz += (aug_expansion_sz - self.img_sample_sz.astype('long')) % 2 aug_expansion_sz = aug_expansion_sz.astype('float32') aug_output_sz = self.img_sample_sz.astype('long').tolist() # Random shift operator get_rand_shift = lambda: None random_shift_factor = getattr(self.params, 'random_shift_factor', 0) if random_shift_factor > 0: get_rand_shift = lambda: ( (np.random.uniform(size=[2]) - 0.5) * self.img_sample_sz * random_shift_factor).astype('long').tolist() # Create transofmations self.transforms = [augmentation.Identity(aug_output_sz)] if 'shift' in self.params.augmentation: self.transforms.extend([ augmentation.Translation(shift, aug_output_sz) for shift in self.params.augmentation['shift'] ]) if 'relativeshift' in self.params.augmentation: get_absolute = lambda shift: (np.array(shift, 'float32') * self. img_sample_sz / 2).astype('long' ).tolist() self.transforms.extend([ augmentation.Translation(get_absolute(shift), aug_output_sz) for shift in self.params.augmentation['relativeshift'] ]) if 'fliplr' in self.params.augmentation and self.params.augmentation[ 'fliplr']: self.transforms.append( augmentation.FlipHorizontal(aug_output_sz, get_rand_shift())) if 'blur' in self.params.augmentation: self.transforms.extend([ augmentation.Blur(sigma, aug_output_sz, get_rand_shift()) for sigma in self.params.augmentation['blur'] ]) if 'scale' in self.params.augmentation: self.transforms.extend([ augmentation.Scale(scale_factor, aug_output_sz, get_rand_shift()) for scale_factor in self.params.augmentation['scale'] ]) if 'rotate' in self.params.augmentation: self.transforms.extend([ augmentation.Rotate(angle, aug_output_sz, get_rand_shift()) for angle in self.params.augmentation['rotate'] ]) # Generate initial samples init_samples = self.params.features.extract_transformed( im, self.pos, self.target_scale, aug_expansion_sz, self.transforms) # Remove augmented samples for those that shall not have for i, use_aug in enumerate( self.fparams.attribute('use_augmentation')): if not use_aug: init_samples[i] = init_samples[i][0:1] # Add dropout samples if 'dropout' in self.params.augmentation: num, prob = self.params.augmentation['dropout'] self.transforms.extend(self.transforms[:1] * num) with fluid.dygraph.guard(): for i, use_aug in enumerate( self.fparams.attribute('use_augmentation')): if use_aug: init_samples[i] = np.concatenate([ init_samples[i], dropout2d(layers.expand(n2p(init_samples[i][0:1]), (num, 1, 1, 1)), prob, is_train=True).numpy() ]) return init_samples